CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 228 results for author: <span class="mathjax">Jain, N</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Jain, N"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Jain%2C+N&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Jain, N"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Jain%2C+N&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Jain%2C+N&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+N&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+N&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+N&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+N&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.19976">arXiv:2502.19976</a> <span> [<a href="https://arxiv.org/pdf/2502.19976">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Efficient Machine Learning Approach for Yield Prediction in Chemical Reactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ghosh%2C+S">Supratim Ghosh</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nupur Jain</a>, <a href="/search/?searchtype=author&query=Sunoj%2C+R+B">Raghavan B. Sunoj</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.19976v1-abstract-short" style="display: inline;"> Developing machine learning (ML) models for yield prediction of chemical reactions has emerged as an important use case scenario in very recent years. In this space, reaction datasets present a range of challenges mostly stemming from imbalance and sparsity. Herein, we consider chemical language representations for reactions to tap into the potential of natural language processing models such as t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19976v1-abstract-full').style.display = 'inline'; document.getElementById('2502.19976v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.19976v1-abstract-full" style="display: none;"> Developing machine learning (ML) models for yield prediction of chemical reactions has emerged as an important use case scenario in very recent years. In this space, reaction datasets present a range of challenges mostly stemming from imbalance and sparsity. Herein, we consider chemical language representations for reactions to tap into the potential of natural language processing models such as the ULMFiT (Universal Language Model Fine Tuning) for yield prediction, which is customized to work across such distribution settings. We contribute a new reaction dataset with more than 860 manually curated reactions collected from literature spanning over a decade, belonging to a family of catalytic meta-C(sp2)-H bond activation reactions of high contemporary importance. Taking cognizance of the dataset size, skewness toward the higher yields, and the sparse distribution characteristics, we developed a new (i) time- and resource-efficient pre-training strategy for downstream transfer learning, and (ii) the CFR (classification followed by regression) model that offers state-of-the-art yield predictions, surpassing conventional direct regression (DR) approaches. Instead of the prevailing pre-training practice of using a large number of unlabeled molecules (1.4 million) from the ChEMBL dataset, we first created a pre-training dataset SSP1 (0.11 million), by using a substructure-based mining from the PubChem database, which is found to be equally effective and more time-efficient in offering enhanced performance. The CFR model with the ULMFiT-SSP1 regressor achieved an impressive RMSE of 8.40 for the CFR-major and 6.48 for the CFR-minor class in yield prediction on the title reaction, with a class boundary of yield at 53 %. Furthermore, the CFR model is highly generalizable as evidenced by the significant improvement over the previous benchmark reaction datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19976v1-abstract-full').style.display = 'none'; document.getElementById('2502.19976v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12444">arXiv:2502.12444</a> <span> [<a href="https://arxiv.org/pdf/2502.12444">pdf</a>, <a href="https://arxiv.org/format/2502.12444">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> SparAMX: Accelerating Compressed LLMs Token Generation on AMX-powered CPUs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=AbouElhamayed%2C+A+F">Ahmed F. AbouElhamayed</a>, <a href="/search/?searchtype=author&query=Dotzel%2C+J">Jordan Dotzel</a>, <a href="/search/?searchtype=author&query=Akhauri%2C+Y">Yash Akhauri</a>, <a href="/search/?searchtype=author&query=Chang%2C+C">Chi-Chih Chang</a>, <a href="/search/?searchtype=author&query=Gobriel%2C+S">Sameh Gobriel</a>, <a href="/search/?searchtype=author&query=Mu%C3%B1oz%2C+J+P">J. Pablo Mu帽oz</a>, <a href="/search/?searchtype=author&query=Chua%2C+V+S">Vui Seng Chua</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nilesh Jain</a>, <a href="/search/?searchtype=author&query=Abdelfattah%2C+M+S">Mohamed S. Abdelfattah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12444v1-abstract-short" style="display: inline;"> Large language models have high compute, latency, and memory requirements. While specialized accelerators such as GPUs and TPUs typically run these workloads, CPUs are more widely available and consume less energy. Accelerating LLMs with CPUs enables broader AI access at a lower cost and power consumption. This acceleration potential for CPUs is especially relevant during the memory-bound decoding… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12444v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12444v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12444v1-abstract-full" style="display: none;"> Large language models have high compute, latency, and memory requirements. While specialized accelerators such as GPUs and TPUs typically run these workloads, CPUs are more widely available and consume less energy. Accelerating LLMs with CPUs enables broader AI access at a lower cost and power consumption. This acceleration potential for CPUs is especially relevant during the memory-bound decoding stage of LLM inference, which processes one token at a time and is becoming increasingly utilized with reasoning models. We utilize Advanced Matrix Extensions (AMX) support on the latest Intel CPUs together with unstructured sparsity to achieve a $1.42 \times$ reduction in end-to-end latency compared to the current PyTorch implementation by applying our technique in linear layers. We provide a set of open-source customized sparse kernels that can speed up any PyTorch model by automatically replacing all linear layers with our custom sparse implementation. Furthermore, we demonstrate for the first time the use of unstructured sparsity in the attention computation achieving a $1.14 \times$ speedup over the current systems without compromising accuracy. Code: https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning/tree/main/SparAMX <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12444v1-abstract-full').style.display = 'none'; document.getElementById('2502.12444v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09328">arXiv:2502.09328</a> <span> [<a href="https://arxiv.org/pdf/2502.09328">pdf</a>, <a href="https://arxiv.org/format/2502.09328">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Copilot Arena: A Platform for Code LLM Evaluation in the Wild </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chi%2C+W">Wayne Chi</a>, <a href="/search/?searchtype=author&query=Chen%2C+V">Valerie Chen</a>, <a href="/search/?searchtype=author&query=Angelopoulos%2C+A+N">Anastasios Nikolas Angelopoulos</a>, <a href="/search/?searchtype=author&query=Chiang%2C+W">Wei-Lin Chiang</a>, <a href="/search/?searchtype=author&query=Mittal%2C+A">Aditya Mittal</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Naman Jain</a>, <a href="/search/?searchtype=author&query=Zhang%2C+T">Tianjun Zhang</a>, <a href="/search/?searchtype=author&query=Stoica%2C+I">Ion Stoica</a>, <a href="/search/?searchtype=author&query=Donahue%2C+C">Chris Donahue</a>, <a href="/search/?searchtype=author&query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09328v1-abstract-short" style="display: inline;"> Evaluating in-the-wild coding capabilities of large language models (LLMs) is a challenging endeavor with no clear solution. We introduce Copilot Arena, a platform to collect user preferences for code generation through native integration into a developer's working environment. Copilot Arena comprises a novel interface for comparing pairs of model outputs, a sampling strategy optimized to reduce l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09328v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09328v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09328v1-abstract-full" style="display: none;"> Evaluating in-the-wild coding capabilities of large language models (LLMs) is a challenging endeavor with no clear solution. We introduce Copilot Arena, a platform to collect user preferences for code generation through native integration into a developer's working environment. Copilot Arena comprises a novel interface for comparing pairs of model outputs, a sampling strategy optimized to reduce latency, and a prompting scheme to enable code completion functionality. Copilot Arena has served over 4.5 million suggestions from 10 models and collected over 11k pairwise judgements. Our results highlight the importance of model evaluations in integrated settings. We find that model rankings from Copilot Arena differ from those of existing evaluations, which we attribute to the more realistic distribution of data and tasks contained in Copilot Arena. We also identify novel insights into human preferences on code such as an observed consistency in user preference across programming languages yet significant variation in preference due to task category. We open-source Copilot Arena and release data to enable human-centric evaluations and improve understanding of coding assistants. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09328v1-abstract-full').style.display = 'none'; document.getElementById('2502.09328v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08145">arXiv:2502.08145</a> <span> [<a href="https://arxiv.org/pdf/2502.08145">pdf</a>, <a href="https://arxiv.org/format/2502.08145">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Democratizing AI: Open-source Scalable LLM Training on GPU-based Supercomputers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Singh%2C+S">Siddharth Singh</a>, <a href="/search/?searchtype=author&query=Singhania%2C+P">Prajwal Singhania</a>, <a href="/search/?searchtype=author&query=Ranjan%2C+A">Aditya Ranjan</a>, <a href="/search/?searchtype=author&query=Kirchenbauer%2C+J">John Kirchenbauer</a>, <a href="/search/?searchtype=author&query=Geiping%2C+J">Jonas Geiping</a>, <a href="/search/?searchtype=author&query=Wen%2C+Y">Yuxin Wen</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Neel Jain</a>, <a href="/search/?searchtype=author&query=Hans%2C+A">Abhimanyu Hans</a>, <a href="/search/?searchtype=author&query=Shu%2C+M">Manli Shu</a>, <a href="/search/?searchtype=author&query=Tomar%2C+A">Aditya Tomar</a>, <a href="/search/?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a>, <a href="/search/?searchtype=author&query=Bhatele%2C+A">Abhinav Bhatele</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08145v1-abstract-short" style="display: inline;"> Training and fine-tuning large language models (LLMs) with hundreds of billions to trillions of parameters requires tens of thousands of GPUs, and a highly scalable software stack. In this work, we present a novel four-dimensional hybrid parallel algorithm implemented in a highly scalable, portable, open-source framework called AxoNN. We describe several performance optimizations in AxoNN to impro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08145v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08145v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08145v1-abstract-full" style="display: none;"> Training and fine-tuning large language models (LLMs) with hundreds of billions to trillions of parameters requires tens of thousands of GPUs, and a highly scalable software stack. In this work, we present a novel four-dimensional hybrid parallel algorithm implemented in a highly scalable, portable, open-source framework called AxoNN. We describe several performance optimizations in AxoNN to improve matrix multiply kernel performance, overlap non-blocking collectives with computation, and performance modeling to choose performance optimal configurations. These have resulted in unprecedented scaling and peak flop/s (bf16) for training of GPT-style transformer models on Perlmutter (620.1 Petaflop/s), Frontier (1.381 Exaflop/s) and Alps (1.423 Exaflop/s). While the abilities of LLMs improve with the number of trainable parameters, so do privacy and copyright risks caused by memorization of training data, which can cause disclosure of sensitive or private information at inference time. We highlight this side effect of scale through experiments that explore "catastrophic memorization", where models are sufficiently large to memorize training data in a single pass, and present an approach to prevent it. As part of this study, we demonstrate fine-tuning of a 405-billion parameter LLM using AxoNN on Frontier. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08145v1-abstract-full').style.display = 'none'; document.getElementById('2502.08145v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06766">arXiv:2502.06766</a> <span> [<a href="https://arxiv.org/pdf/2502.06766">pdf</a>, <a href="https://arxiv.org/format/2502.06766">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Sparsity for Long Context Inference: Million Token Contexts on Commodity GPUs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Synk%2C+R">Ryan Synk</a>, <a href="/search/?searchtype=author&query=Hoover%2C+M">Monte Hoover</a>, <a href="/search/?searchtype=author&query=Kirchenbauer%2C+J">John Kirchenbauer</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Neel Jain</a>, <a href="/search/?searchtype=author&query=Stein%2C+A">Alex Stein</a>, <a href="/search/?searchtype=author&query=Shu%2C+M">Manli Shu</a>, <a href="/search/?searchtype=author&query=Sanchez%2C+J+M">Josue Melendez Sanchez</a>, <a href="/search/?searchtype=author&query=Duraiswami%2C+R">Ramani Duraiswami</a>, <a href="/search/?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06766v2-abstract-short" style="display: inline;"> There is growing demand for performing inference with hundreds of thousands of input tokens on trained transformer models. Inference at this extreme scale demands significant computational resources, hindering the application of transformers at long contexts on commodity (i.e not data center scale) hardware. To address the inference time costs associated with running self-attention based transform… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06766v2-abstract-full').style.display = 'inline'; document.getElementById('2502.06766v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06766v2-abstract-full" style="display: none;"> There is growing demand for performing inference with hundreds of thousands of input tokens on trained transformer models. Inference at this extreme scale demands significant computational resources, hindering the application of transformers at long contexts on commodity (i.e not data center scale) hardware. To address the inference time costs associated with running self-attention based transformer language models on long contexts and enable their adoption on widely available hardware, we propose a tunable mechanism that reduces the cost of the forward pass by attending to only the most relevant tokens at every generation step using a top-k selection mechanism. We showcase the efficiency gains afforded by our method by performing inference on context windows up to 1M tokens using approximately 16GB of GPU RAM. Our experiments reveal that models are capable of handling the sparsity induced by the reduced number of keys and values. By attending to less than 2% of input tokens, we achieve over 95% of model performance on common benchmarks (RULER, AlpacaEval, and Open LLM Leaderboard). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06766v2-abstract-full').style.display = 'none'; document.getElementById('2502.06766v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 9 figures, 2 tables in main body</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05171">arXiv:2502.05171</a> <span> [<a href="https://arxiv.org/pdf/2502.05171">pdf</a>, <a href="https://arxiv.org/format/2502.05171">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Scaling up Test-Time Compute with Latent Reasoning: A Recurrent Depth Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Geiping%2C+J">Jonas Geiping</a>, <a href="/search/?searchtype=author&query=McLeish%2C+S">Sean McLeish</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Neel Jain</a>, <a href="/search/?searchtype=author&query=Kirchenbauer%2C+J">John Kirchenbauer</a>, <a href="/search/?searchtype=author&query=Singh%2C+S">Siddharth Singh</a>, <a href="/search/?searchtype=author&query=Bartoldson%2C+B+R">Brian R. Bartoldson</a>, <a href="/search/?searchtype=author&query=Kailkhura%2C+B">Bhavya Kailkhura</a>, <a href="/search/?searchtype=author&query=Bhatele%2C+A">Abhinav Bhatele</a>, <a href="/search/?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05171v2-abstract-short" style="display: inline;"> We study a novel language model architecture that is capable of scaling test-time computation by implicitly reasoning in latent space. Our model works by iterating a recurrent block, thereby unrolling to arbitrary depth at test-time. This stands in contrast to mainstream reasoning models that scale up compute by producing more tokens. Unlike approaches based on chain-of-thought, our approach does… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05171v2-abstract-full').style.display = 'inline'; document.getElementById('2502.05171v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05171v2-abstract-full" style="display: none;"> We study a novel language model architecture that is capable of scaling test-time computation by implicitly reasoning in latent space. Our model works by iterating a recurrent block, thereby unrolling to arbitrary depth at test-time. This stands in contrast to mainstream reasoning models that scale up compute by producing more tokens. Unlike approaches based on chain-of-thought, our approach does not require any specialized training data, can work with small context windows, and can capture types of reasoning that are not easily represented in words. We scale a proof-of-concept model to 3.5 billion parameters and 800 billion tokens. We show that the resulting model can improve its performance on reasoning benchmarks, sometimes dramatically, up to a computation load equivalent to 50 billion parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05171v2-abstract-full').style.display = 'none'; document.getElementById('2502.05171v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The model is available at https://huggingface.co/tomg-group-umd/huginn-0125. Code and data recipe can be found at https://github.com/seal-rg/recurrent-pretraining</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17088">arXiv:2501.17088</a> <span> [<a href="https://arxiv.org/pdf/2501.17088">pdf</a>, <a href="https://arxiv.org/format/2501.17088">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Mamba-Shedder: Post-Transformer Compression for Efficient Selective Structured State Space Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Mu%C3%B1oz%2C+J+P">J. Pablo Mu帽oz</a>, <a href="/search/?searchtype=author&query=Yuan%2C+J">Jinjie Yuan</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nilesh Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17088v1-abstract-short" style="display: inline;"> Large pre-trained models have achieved outstanding results in sequence modeling. The Transformer block and its attention mechanism have been the main drivers of the success of these models. Recently, alternative architectures, such as Selective Structured State Space Models (SSMs), have been proposed to address the inefficiencies of Transformers. This paper explores the compression of SSM-based mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17088v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17088v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17088v1-abstract-full" style="display: none;"> Large pre-trained models have achieved outstanding results in sequence modeling. The Transformer block and its attention mechanism have been the main drivers of the success of these models. Recently, alternative architectures, such as Selective Structured State Space Models (SSMs), have been proposed to address the inefficiencies of Transformers. This paper explores the compression of SSM-based models, particularly Mamba and its hybrids. We study the sensitivity of these models to the removal of selected components at different granularities to reduce the model size and computational overhead, thus improving their efficiency while maintaining accuracy. The proposed solutions, collectively referred to as Mamba-Shedder, achieve a speedup of up to 1.4x during inference, demonstrating that model efficiency can be improved by eliminating several redundancies with minimal impact on the overall model performance. The code is available at https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17088v1-abstract-full').style.display = 'none'; document.getElementById('2501.17088v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL-25 - Main track</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.0 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16372">arXiv:2501.16372</a> <span> [<a href="https://arxiv.org/pdf/2501.16372">pdf</a>, <a href="https://arxiv.org/format/2501.16372">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Low-Rank Adapters Meet Neural Architecture Search for LLM Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Mu%C3%B1oz%2C+J+P">J. Pablo Mu帽oz</a>, <a href="/search/?searchtype=author&query=Yuan%2C+J">Jinjie Yuan</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nilesh Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16372v1-abstract-short" style="display: inline;"> The rapid expansion of Large Language Models (LLMs) has posed significant challenges regarding the computational resources required for fine-tuning and deployment. Recent advancements in low-rank adapters have demonstrated their efficacy in parameter-efficient fine-tuning (PEFT) of these models. This retrospective paper comprehensively discusses innovative approaches that synergize low-rank repres… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16372v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16372v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16372v1-abstract-full" style="display: none;"> The rapid expansion of Large Language Models (LLMs) has posed significant challenges regarding the computational resources required for fine-tuning and deployment. Recent advancements in low-rank adapters have demonstrated their efficacy in parameter-efficient fine-tuning (PEFT) of these models. This retrospective paper comprehensively discusses innovative approaches that synergize low-rank representations with Neural Architecture Search (NAS) techniques, particularly weight-sharing super-networks. Robust solutions for compressing and fine-tuning large pre-trained models are developed by integrating these methodologies. Our analysis highlights the potential of these combined strategies to democratize the use of LLMs, making them more accessible for deployment in resource-constrained environments. The resulting models exhibit reduced memory footprints and faster inference times, paving the way for more practical and scalable applications of LLMs. Models and code are available at https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16372v1-abstract-full').style.display = 'none'; document.getElementById('2501.16372v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI-25 Workshop on Connecting Low-rank Representations in AI</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15722">arXiv:2501.15722</a> <span> [<a href="https://arxiv.org/pdf/2501.15722">pdf</a>, <a href="https://arxiv.org/format/2501.15722">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> INRet: A General Framework for Accurate Retrieval of INRs for Shapes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Guan%2C+Y">Yushi Guan</a>, <a href="/search/?searchtype=author&query=Kwan%2C+D">Daniel Kwan</a>, <a href="/search/?searchtype=author&query=Liang%2C+R">Ruofan Liang</a>, <a href="/search/?searchtype=author&query=Panneer%2C+S">Selvakumar Panneer</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nilesh Jain</a>, <a href="/search/?searchtype=author&query=Ahuja%2C+N">Nilesh Ahuja</a>, <a href="/search/?searchtype=author&query=Vijaykumar%2C+N">Nandita Vijaykumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15722v1-abstract-short" style="display: inline;"> Implicit neural representations (INRs) have become an important method for encoding various data types, such as 3D objects or scenes, images, and videos. They have proven to be particularly effective at representing 3D content, e.g., 3D scene reconstruction from 2D images, novel 3D content creation, as well as the representation, interpolation, and completion of 3D shapes. With the widespread gene… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15722v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15722v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15722v1-abstract-full" style="display: none;"> Implicit neural representations (INRs) have become an important method for encoding various data types, such as 3D objects or scenes, images, and videos. They have proven to be particularly effective at representing 3D content, e.g., 3D scene reconstruction from 2D images, novel 3D content creation, as well as the representation, interpolation, and completion of 3D shapes. With the widespread generation of 3D data in an INR format, there is a need to support effective organization and retrieval of INRs saved in a data store. A key aspect of retrieval and clustering of INRs in a data store is the formulation of similarity between INRs that would, for example, enable retrieval of similar INRs using a query INR. In this work, we propose INRet, a method for determining similarity between INRs that represent shapes, thus enabling accurate retrieval of similar shape INRs from an INR data store. INRet flexibly supports different INR architectures such as INRs with octree grids, triplanes, and hash grids, as well as different implicit functions including signed/unsigned distance function and occupancy field. We demonstrate that our method is more general and accurate than the existing INR retrieval method, which only supports simple MLP INRs and requires the same architecture between the query and stored INRs. Furthermore, compared to converting INRs to other representations (e.g., point clouds or multi-view images) for 3D shape retrieval, INRet achieves higher accuracy while avoiding the conversion overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15722v1-abstract-full').style.display = 'none'; document.getElementById('2501.15722v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">3DV 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13165">arXiv:2501.13165</a> <span> [<a href="https://arxiv.org/pdf/2501.13165">pdf</a>, <a href="https://arxiv.org/format/2501.13165">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> QuFeX: Quantum feature extraction module for hybrid quantum-classical deep neural networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jain%2C+N">Naman Jain</a>, <a href="/search/?searchtype=author&query=Kalev%2C+A">Amir Kalev</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13165v1-abstract-short" style="display: inline;"> We introduce Quantum Feature Extraction (QuFeX), a novel quantum machine learning module. The proposed module enables feature extraction in a reduced-dimensional space, significantly decreasing the number of parallel evaluations required in typical quantum convolutional neural network architectures. Its design allows seamless integration into deep classical neural networks, making it particularly… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13165v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13165v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13165v1-abstract-full" style="display: none;"> We introduce Quantum Feature Extraction (QuFeX), a novel quantum machine learning module. The proposed module enables feature extraction in a reduced-dimensional space, significantly decreasing the number of parallel evaluations required in typical quantum convolutional neural network architectures. Its design allows seamless integration into deep classical neural networks, making it particularly suitable for hybrid quantum-classical models. As an application of QuFeX, we propose Qu-Net -- a hybrid architecture which integrates QuFeX at the bottleneck of a U-Net architecture. The latter is widely used for image segmentation tasks such as medical imaging and autonomous driving. Our numerical analysis indicates that the Qu-Net can achieve superior segmentation performance compared to a U-Net baseline. These results highlight the potential of QuFeX to enhance deep neural networks by leveraging hybrid computational paradigms, providing a path towards a robust framework for real-world applications requiring precise feature extraction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13165v1-abstract-full').style.display = 'none'; document.getElementById('2501.13165v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09949">arXiv:2501.09949</a> <span> [<a href="https://arxiv.org/pdf/2501.09949">pdf</a>, <a href="https://arxiv.org/format/2501.09949">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MultiPruner: Balanced Structure Removal in Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Mu%C3%B1oz%2C+J+P">J. Pablo Mu帽oz</a>, <a href="/search/?searchtype=author&query=Yuan%2C+J">Jinjie Yuan</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nilesh Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09949v1-abstract-short" style="display: inline;"> Recently, state-of-the-art approaches for pruning large pre-trained models (LPMs) have demonstrated that the training-free removal of non-critical residual blocks in Transformers is viable for reducing model size, achieving results that outperform previous training-free pruning approaches. Motivated by these findings, we extend BlockPruner (Zhong et al., 2024) and propose MultiPruner, a pruning ap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09949v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09949v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09949v1-abstract-full" style="display: none;"> Recently, state-of-the-art approaches for pruning large pre-trained models (LPMs) have demonstrated that the training-free removal of non-critical residual blocks in Transformers is viable for reducing model size, achieving results that outperform previous training-free pruning approaches. Motivated by these findings, we extend BlockPruner (Zhong et al., 2024) and propose MultiPruner, a pruning approach that surpasses recent training-free pruning methods by adopting a multidimensional, iterative, fine-grained pruning strategy. In MultiPruner, multidimensional pruning reinstates the structural balance in block-pruned models by sequentially compressing along three dimensions: i) residual blocks, ii) channels of multilayer perceptrons (MLP), and iii) attention heads. This solution enhances zero-shot accuracy on downstream tasks compared to other techniques while improving model compression ratios, producing compressed models with fewer computing and memory requirements. Extensive experiments demonstrate the advantages of the proposed method across various large pre-trained models. The code and pruning configurations are available at https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09949v1-abstract-full').style.display = 'none'; document.getElementById('2501.09949v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.0 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09029">arXiv:2501.09029</a> <span> [<a href="https://arxiv.org/pdf/2501.09029">pdf</a>, <a href="https://arxiv.org/format/2501.09029">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Data Integrity through Provenance Tracking in Semantic Web Frameworks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jain%2C+N">Nilesh Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09029v1-abstract-short" style="display: inline;"> This paper explores the integration of provenance tracking systems within the context of Semantic Web technologies to enhance data integrity in diverse operational environments. SURROUND Australia Pty Ltd demonstrates innovative applica-tions of the PROV Data Model (PROV-DM) and its Semantic Web variant, PROV-O, to systematically record and manage provenance information across multiple data proces… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09029v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09029v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09029v1-abstract-full" style="display: none;"> This paper explores the integration of provenance tracking systems within the context of Semantic Web technologies to enhance data integrity in diverse operational environments. SURROUND Australia Pty Ltd demonstrates innovative applica-tions of the PROV Data Model (PROV-DM) and its Semantic Web variant, PROV-O, to systematically record and manage provenance information across multiple data processing domains. By employing RDF and Knowledge Graphs, SURROUND ad-dresses the critical challenges of shared entity identification and provenance granularity. The paper highlights the company's architecture for capturing comprehensive provenance data, en-abling robust validation, traceability, and knowledge inference. Through the examination of two projects, we illustrate how provenance mechanisms not only improve data reliability but also facilitate seamless integration across heterogeneous systems. Our findings underscore the importance of sophisticated provenance solutions in maintaining data integrity, serving as a reference for industry peers and academics engaged in provenance research and implementation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09029v1-abstract-full').style.display = 'none'; document.getElementById('2501.09029v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This 10-page manuscript with 5 figures focuses on leveraging Semantic Web frameworks to enhance data integrity through provenance tracking. Intended for conference submission, it aligns with the cs.AI category, addressing knowledge representation, data modeling, and uncertainty in AI using advanced tools like PROV-DM and PROV-O</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T30; 68T35; 68P15: Covers knowledge representation; Semantic Web applications; and database theory for provenance tracking and data integrity </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08870">arXiv:2501.08870</a> <span> [<a href="https://arxiv.org/pdf/2501.08870">pdf</a>, <a href="https://arxiv.org/format/2501.08870">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking of Fluorescence Lifetime Measurements using Time-Frequency Correlated Photons </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=G%C3%A4bler%2C+T+B">Tobias Bernd G盲bler</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nitish Jain</a>, <a href="/search/?searchtype=author&query=Then%2C+P">Patrick Then</a>, <a href="/search/?searchtype=author&query=Eggeling%2C+C">Christian Eggeling</a>, <a href="/search/?searchtype=author&query=Gr%C3%A4fe%2C+M">Markus Gr盲fe</a>, <a href="/search/?searchtype=author&query=Gili%2C+V+F">Valerio Flavio Gili</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08870v2-abstract-short" style="display: inline;"> The investigation of fluorescence lifetime became an important tool in biology and medical science. So far, established methods of fluorescence lifetime measurements require the illumination of the investigated probes with pulsed or amplitude-modulated light. In this paper, we examine the limitations of an innovative method of fluorescence lifetime using the strong time-frequency correlation of en… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08870v2-abstract-full').style.display = 'inline'; document.getElementById('2501.08870v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08870v2-abstract-full" style="display: none;"> The investigation of fluorescence lifetime became an important tool in biology and medical science. So far, established methods of fluorescence lifetime measurements require the illumination of the investigated probes with pulsed or amplitude-modulated light. In this paper, we examine the limitations of an innovative method of fluorescence lifetime using the strong time-frequency correlation of entangled photons generated by a continuous-wave source. For this purpose, we investigate the lifetime of IR-140 to demonstrate the functional principle and its dependencies on different experimental parameters. We also compare this technique with state-of-the-art FLIM and observed an improved figure-of-merit. Finally, we discuss the potential of a quantum advantage. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08870v2-abstract-full').style.display = 'none'; document.getElementById('2501.08870v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.14234">arXiv:2412.14234</a> <span> [<a href="https://arxiv.org/pdf/2412.14234">pdf</a>, <a href="https://arxiv.org/format/2412.14234">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Syzygy: Dual Code-Test C to (safe) Rust Translation using LLMs and Dynamic Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Shetty%2C+M">Manish Shetty</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Naman Jain</a>, <a href="/search/?searchtype=author&query=Godbole%2C+A">Adwait Godbole</a>, <a href="/search/?searchtype=author&query=Seshia%2C+S+A">Sanjit A. Seshia</a>, <a href="/search/?searchtype=author&query=Sen%2C+K">Koushik Sen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.14234v2-abstract-short" style="display: inline;"> Despite extensive usage in high-performance, low-level systems programming applications, C is susceptible to vulnerabilities due to manual memory management and unsafe pointer operations. Rust, a modern systems programming language, offers a compelling alternative. Its unique ownership model and type system ensure memory safety without sacrificing performance. In this paper, we present Syzygy, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14234v2-abstract-full').style.display = 'inline'; document.getElementById('2412.14234v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.14234v2-abstract-full" style="display: none;"> Despite extensive usage in high-performance, low-level systems programming applications, C is susceptible to vulnerabilities due to manual memory management and unsafe pointer operations. Rust, a modern systems programming language, offers a compelling alternative. Its unique ownership model and type system ensure memory safety without sacrificing performance. In this paper, we present Syzygy, an automated approach to translate C to safe Rust. Our technique uses a synergistic combination of LLM-driven code and test translation guided by dynamic-analysis-generated execution information. This paired translation runs incrementally in a loop over the program in dependency order of the code elements while maintaining per-step correctness. Our approach exposes novel insights on combining the strengths of LLMs and dynamic analysis in the context of scaling and combining code generation with testing. We apply our approach to successfully translate Zopfli, a high-performance compression library with ~3000 lines of code and 98 functions. We validate the translation by testing equivalence with the source C program on a set of inputs. To our knowledge, this is the largest automated and test-validated C to safe Rust code translation achieved so far. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14234v2-abstract-full').style.display = 'none'; document.getElementById('2412.14234v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project webpage at https://syzygy-project.github.io/. Preliminary version accepted at LLM4Code 2025, 34 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2; D.2; D.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07174">arXiv:2412.07174</a> <span> [<a href="https://arxiv.org/pdf/2412.07174">pdf</a>, <a href="https://arxiv.org/format/2412.07174">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Post-Training Statistical Calibration for Higher Activation Sparsity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chua%2C+V+S">Vui Seng Chua</a>, <a href="/search/?searchtype=author&query=Pan%2C+Y">Yujie Pan</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nilesh Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07174v1-abstract-short" style="display: inline;"> We present Statistical Calibrated Activation Pruning (SCAP), a post-training activation pruning framework that (1) generalizes sparsification by input activations of Fully-Connected layers for generic and flexible application across Transformers, and (2) features a simple Mode-Centering technique to pre-calibrate activation distributions for maximizing post-training sparsity. Our results demonstra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07174v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07174v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07174v1-abstract-full" style="display: none;"> We present Statistical Calibrated Activation Pruning (SCAP), a post-training activation pruning framework that (1) generalizes sparsification by input activations of Fully-Connected layers for generic and flexible application across Transformers, and (2) features a simple Mode-Centering technique to pre-calibrate activation distributions for maximizing post-training sparsity. Our results demonstrate robust Pareto efficiency compared to prior methods, translating to a 1.5x additional LLM decoding speedup against CATS at iso model quality. SCAP effectiveness is empirically verified across a wide range of models, including recent Transformer Decoders, MoE, Mamba2, Encoding Transformer, and pre-quantized models, highlighting its practicality and scalability. The code is available at: https://github.com/IntelLabs/SCAP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07174v1-abstract-full').style.display = 'none'; document.getElementById('2412.07174v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ENLSP-IV NeurIPS Workshop 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06748">arXiv:2412.06748</a> <span> [<a href="https://arxiv.org/pdf/2412.06748">pdf</a>, <a href="https://arxiv.org/format/2412.06748">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Refusal Tokens: A Simple Way to Calibrate Refusals in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jain%2C+N">Neel Jain</a>, <a href="/search/?searchtype=author&query=Shrivastava%2C+A">Aditya Shrivastava</a>, <a href="/search/?searchtype=author&query=Zhu%2C+C">Chenyang Zhu</a>, <a href="/search/?searchtype=author&query=Liu%2C+D">Daben Liu</a>, <a href="/search/?searchtype=author&query=Samuel%2C+A">Alfy Samuel</a>, <a href="/search/?searchtype=author&query=Panda%2C+A">Ashwinee Panda</a>, <a href="/search/?searchtype=author&query=Kumar%2C+A">Anoop Kumar</a>, <a href="/search/?searchtype=author&query=Goldblum%2C+M">Micah Goldblum</a>, <a href="/search/?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06748v1-abstract-short" style="display: inline;"> A key component of building safe and reliable language models is enabling the models to appropriately refuse to follow certain instructions or answer certain questions. We may want models to output refusal messages for various categories of user queries, for example, ill-posed questions, instructions for committing illegal acts, or queries which require information past the model's knowledge horiz… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06748v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06748v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06748v1-abstract-full" style="display: none;"> A key component of building safe and reliable language models is enabling the models to appropriately refuse to follow certain instructions or answer certain questions. We may want models to output refusal messages for various categories of user queries, for example, ill-posed questions, instructions for committing illegal acts, or queries which require information past the model's knowledge horizon. Engineering models that refuse to answer such questions is complicated by the fact that an individual may want their model to exhibit varying levels of sensitivity for refusing queries of various categories, and different users may want different refusal rates. The current default approach involves training multiple models with varying proportions of refusal messages from each category to achieve the desired refusal rates, which is computationally expensive and may require training a new model to accommodate each user's desired preference over refusal rates. To address these challenges, we propose refusal tokens, one such token for each refusal category or a single refusal token, which are prepended to the model's responses during training. We then show how to increase or decrease the probability of generating the refusal token for each category during inference to steer the model's refusal behavior. Refusal tokens enable controlling a single model's refusal rates without the need of any further fine-tuning, but only by selectively intervening during generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06748v1-abstract-full').style.display = 'none'; document.getElementById('2412.06748v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.04478">arXiv:2412.04478</a> <span> [<a href="https://arxiv.org/pdf/2412.04478">pdf</a>, <a href="https://arxiv.org/format/2412.04478">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LibEvolutionEval: A Benchmark and Study for Version-Specific Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Kuhar%2C+S">Sachit Kuhar</a>, <a href="/search/?searchtype=author&query=Ahmad%2C+W+U">Wasi Uddin Ahmad</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zijian Wang</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nihal Jain</a>, <a href="/search/?searchtype=author&query=Qian%2C+H">Haifeng Qian</a>, <a href="/search/?searchtype=author&query=Ray%2C+B">Baishakhi Ray</a>, <a href="/search/?searchtype=author&query=Ramanathan%2C+M+K">Murali Krishna Ramanathan</a>, <a href="/search/?searchtype=author&query=Ma%2C+X">Xiaofei Ma</a>, <a href="/search/?searchtype=author&query=Deoras%2C+A">Anoop Deoras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.04478v1-abstract-short" style="display: inline;"> Recent advancements in code completion models have primarily focused on local file contexts. However, these studies do not fully capture the complexity of real-world software development, which often requires the use of rapidly-evolving public libraries. To fill the gap, we introduce LibEvolutionEval, a detailed study requiring an understanding of library evolution to perform in-line code completi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.04478v1-abstract-full').style.display = 'inline'; document.getElementById('2412.04478v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.04478v1-abstract-full" style="display: none;"> Recent advancements in code completion models have primarily focused on local file contexts. However, these studies do not fully capture the complexity of real-world software development, which often requires the use of rapidly-evolving public libraries. To fill the gap, we introduce LibEvolutionEval, a detailed study requiring an understanding of library evolution to perform in-line code completion accurately. LibEvolutionEval provides a version-specific code-completion task comprised of eight libraries (torch, torchvision, scipy, pil, tqdm, pyyaml, matplotlib, and pandas) as they evolve over the year along with a detailed analysis of the evolution of two popular and well-maintained public libraries: PyTorch and Matplotlib. We evaluate popular public models and find that public library evolution significantly influences model performance. We explored mitigation methods by studying how retrieved version-specific library documentation and prompting can improve the model's capability in handling these fast-evolving packages, paving a promising future path in better handling fast-evolving libraries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.04478v1-abstract-full').style.display = 'none'; document.getElementById('2412.04478v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.02889">arXiv:2412.02889</a> <span> [<a href="https://arxiv.org/pdf/2412.02889">pdf</a>, <a href="https://arxiv.org/format/2412.02889">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Deep-Learning Based Docking Methods: Fair Comparisons to Conventional Docking Workflows </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jain%2C+A+N">Ajay N. Jain</a>, <a href="/search/?searchtype=author&query=Cleves%2C+A+E">Ann E. Cleves</a>, <a href="/search/?searchtype=author&query=Walters%2C+W+P">W. Patrick Walters</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.02889v2-abstract-short" style="display: inline;"> The diffusion learning method, DiffDock, for docking small-molecule ligands into protein binding sites was recently introduced. Results included comparisons to more conventional docking approaches, with DiffDock showing superior performance. Here, we employ a fully automatic workflow using the Surflex-Dock methods to generate a fair baseline for conventional docking approaches. Results were genera… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02889v2-abstract-full').style.display = 'inline'; document.getElementById('2412.02889v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.02889v2-abstract-full" style="display: none;"> The diffusion learning method, DiffDock, for docking small-molecule ligands into protein binding sites was recently introduced. Results included comparisons to more conventional docking approaches, with DiffDock showing superior performance. Here, we employ a fully automatic workflow using the Surflex-Dock methods to generate a fair baseline for conventional docking approaches. Results were generated for the common and expected situation where a binding site location is known and also for the condition of an unknown binding site. For the known binding site condition, Surflex-Dock success rates at 2.0 Angstroms RMSD far exceeded those for DiffDock (Top-1/Top-5 success rates, respectively, were 68/81% compared with 45/51%). Glide performed with similar success rates (67/73%) to Surflex-Dock for the known binding site condition, and results for AutoDock Vina and Gnina followed this pattern. For the unknown binding site condition, using an automated method to identify multiple binding pockets, Surflex-Dock success rates again exceeded those of DiffDock, but by a somewhat lesser margin. DiffDock made use of roughly 17,000 co-crystal structures for learning (98% of PDBBind version 2020, pre-2019 structures) for a training set in order to predict on 363 test cases (2% of PDBBind 2020) from 2019 forward. DiffDock's performance was inextricably linked with the presence of near-neighbor cases of close to identical protein-ligand complexes in the training set for over half of the test set cases. DiffDock exhibited a 40 percentage point difference on near-neighbor cases (two-thirds of all test cases) compared with cases with no near-neighbor training case. DiffDock has apparently encoded a type of table-lookup during its learning process, rendering meaningful applications beyond its reach. Further, it does not perform even close to competitively with a competently run modern docking workflow. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02889v2-abstract-full').style.display = 'none'; document.getElementById('2412.02889v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Post-Conclusion addendum added with additional reference and context, 19 pages including references and appendices, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15929">arXiv:2411.15929</a> <span> [<a href="https://arxiv.org/pdf/2411.15929">pdf</a>, <a href="https://arxiv.org/format/2411.15929">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Nonlinear Model Predictive Control of a Hybrid Thermal Management System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gulewicz%2C+D">Demetrius Gulewicz</a>, <a href="/search/?searchtype=author&query=Inyang-Udoh%2C+U">Uduak Inyang-Udoh</a>, <a href="/search/?searchtype=author&query=Bird%2C+T">Trevor Bird</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Neera Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15929v1-abstract-short" style="display: inline;"> Model predictive control has gained popularity for its ability to satisfy constraints and guarantee robustness for certain classes of systems. However, for systems whose dynamics are characterized by a high state dimension, substantial nonlinearities, and stiffness, suitable methods for online nonlinear MPC are lacking. One example of such a system is a vehicle thermal management system (TMS) with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15929v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15929v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15929v1-abstract-full" style="display: none;"> Model predictive control has gained popularity for its ability to satisfy constraints and guarantee robustness for certain classes of systems. However, for systems whose dynamics are characterized by a high state dimension, substantial nonlinearities, and stiffness, suitable methods for online nonlinear MPC are lacking. One example of such a system is a vehicle thermal management system (TMS) with integrated thermal energy storage (TES), also referred to as a hybrid TMS. Here, hybrid refers to the ability to achieve cooling through a conventional heat exchanger or via melting of a phase change material, or both. Given increased electrification in vehicle platforms, more stringent performance specifications are being placed on TMS, in turn requiring more advanced control methods. In this paper, we present the design and real-time implementation of a nonlinear model predictive controller with 77 states on an experimental hybrid TMS testbed. We show how, in spite of high-dimension and stiff dynamics, an explicit integration method can be obtained by linearizing the dynamics at each time step within the MPC horizon. This integration method further allows the first-order gradients to be calculated with minimal additional computational cost. Through simulated and experimental results, we demonstrate the utility of the proposed solution method and the benefits of TES for mitigating highly transient heat loads achieved by actively controlling its charging and discharging behavior. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15929v1-abstract-full').style.display = 'none'; document.getElementById('2411.15929v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 14 figures, submitted to IEEE Transactions on Control Systems Technology</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04442">arXiv:2411.04442</a> <span> [<a href="https://arxiv.org/pdf/2411.04442">pdf</a>, <a href="https://arxiv.org/format/2411.04442">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking Single-Qubit Gates on a Noise-Biased Qubit Beyond the Fault-Tolerant Threshold </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Qing%2C+B">Bingcheng Qing</a>, <a href="/search/?searchtype=author&query=Hajr%2C+A">Ahmed Hajr</a>, <a href="/search/?searchtype=author&query=Wang%2C+K">Ke Wang</a>, <a href="/search/?searchtype=author&query=Koolstra%2C+G">Gerwin Koolstra</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+L+B">Long B. Nguyen</a>, <a href="/search/?searchtype=author&query=Hines%2C+J">Jordan Hines</a>, <a href="/search/?searchtype=author&query=Huang%2C+I">Irwin Huang</a>, <a href="/search/?searchtype=author&query=Bhandari%2C+B">Bibek Bhandari</a>, <a href="/search/?searchtype=author&query=Padramrazi%2C+Z">Zahra Padramrazi</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Larry Chen</a>, <a href="/search/?searchtype=author&query=Kang%2C+Z">Ziqi Kang</a>, <a href="/search/?searchtype=author&query=J%C3%BCnger%2C+C">Christian J眉nger</a>, <a href="/search/?searchtype=author&query=Goss%2C+N">Noah Goss</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nikitha Jain</a>, <a href="/search/?searchtype=author&query=Kim%2C+H">Hyunseong Kim</a>, <a href="/search/?searchtype=author&query=Lee%2C+K">Kan-Heng Lee</a>, <a href="/search/?searchtype=author&query=Hashim%2C+A">Akel Hashim</a>, <a href="/search/?searchtype=author&query=Frattini%2C+N+E">Nicholas E. Frattini</a>, <a href="/search/?searchtype=author&query=Dressel%2C+J">Justin Dressel</a>, <a href="/search/?searchtype=author&query=Jordan%2C+A+N">Andrew N. Jordan</a>, <a href="/search/?searchtype=author&query=Santiago%2C+D+I">David I. Santiago</a>, <a href="/search/?searchtype=author&query=Siddiqi%2C+I">Irfan Siddiqi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04442v1-abstract-short" style="display: inline;"> The ubiquitous noise in quantum system hinders the advancement of quantum information processing and has driven the emergence of different hardware-efficient quantum error correction protocols. Among them, qubits with structured noise, especially with biased noise, are one of the most promising platform to achieve fault-tolerance due to the high error thresholds of quantum error correction codes t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04442v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04442v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04442v1-abstract-full" style="display: none;"> The ubiquitous noise in quantum system hinders the advancement of quantum information processing and has driven the emergence of different hardware-efficient quantum error correction protocols. Among them, qubits with structured noise, especially with biased noise, are one of the most promising platform to achieve fault-tolerance due to the high error thresholds of quantum error correction codes tailored for them. Nevertheless, their quantum operations are challenging and the demonstration of their performance beyond the fault-tolerant threshold remain incomplete. Here, we leverage Schr枚dinger cat states in a scalable planar superconducting nonlinear oscillator to thoroughly characterize the high-fidelity single-qubit quantum operations with systematic quantum tomography and benchmarking tools, demonstrating the state-of-the-art performance of operations crossing the fault-tolerant threshold of the XZZX surface code. These results thus embody a transformative milestone in the exploration of quantum systems with structured error channels. Notably, our framework is extensible to other types of structured-noise systems, paving the way for systematic characterization and validation of novel quantum platforms with structured noise. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04442v1-abstract-full').style.display = 'none'; document.getElementById('2411.04442v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02870">arXiv:2411.02870</a> <span> [<a href="https://arxiv.org/pdf/2411.02870">pdf</a>, <a href="https://arxiv.org/format/2411.02870">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Astrophysical Phenomena">astro-ph.HE</span> </div> </div> <p class="title is-5 mathjax"> Morphology of 32 Repeating Fast Radio Burst Sources at Microsecond Time Scales with CHIME/FRB </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Curtin%2C+A+P">Alice P. Curtin</a>, <a href="/search/?searchtype=author&query=Sand%2C+K+R">Ketan R. Sand</a>, <a href="/search/?searchtype=author&query=Pleunis%2C+Z">Ziggy Pleunis</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Naman Jain</a>, <a href="/search/?searchtype=author&query=Kaspi%2C+V">Victoria Kaspi</a>, <a href="/search/?searchtype=author&query=Michilli%2C+D">Daniele Michilli</a>, <a href="/search/?searchtype=author&query=Fonseca%2C+E">Emmanuel Fonseca</a>, <a href="/search/?searchtype=author&query=Shin%2C+K">Kaitlyn Shin</a>, <a href="/search/?searchtype=author&query=Nimmo%2C+K">Kenzie Nimmo</a>, <a href="/search/?searchtype=author&query=Brar%2C+C">Charanjot Brar</a>, <a href="/search/?searchtype=author&query=Dong%2C+F+A">Fengqiu Adam Dong</a>, <a href="/search/?searchtype=author&query=Eadie%2C+G+M">Gwendolyn M. Eadie</a>, <a href="/search/?searchtype=author&query=Gaensler%2C+B+M">B. M. Gaensler</a>, <a href="/search/?searchtype=author&query=Herrera-Martin%2C+A">Antonio Herrera-Martin</a>, <a href="/search/?searchtype=author&query=Ibik%2C+A+L">Adaeze L. Ibik</a>, <a href="/search/?searchtype=author&query=Joseph%2C+R+C">Ronny C. Joseph</a>, <a href="/search/?searchtype=author&query=Kaczmarek%2C+J">Jane Kaczmarek</a>, <a href="/search/?searchtype=author&query=Leung%2C+C">Calvin Leung</a>, <a href="/search/?searchtype=author&query=Main%2C+R">Robert Main</a>, <a href="/search/?searchtype=author&query=Masui%2C+K+W">Kiyoshi W. Masui</a>, <a href="/search/?searchtype=author&query=McKinven%2C+R">Ryan McKinven</a>, <a href="/search/?searchtype=author&query=Mena-Parra%2C+J">Juan Mena-Parra</a>, <a href="/search/?searchtype=author&query=Ng%2C+C">Cherry Ng</a>, <a href="/search/?searchtype=author&query=Pandhi%2C+A">Ayush Pandhi</a>, <a href="/search/?searchtype=author&query=Pearlman%2C+A+B">Aaron B. Pearlman</a> , et al. (5 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02870v1-abstract-short" style="display: inline;"> The Canadian Hydrogen Intensity Mapping Experiment Fast Radio Burst (CHIME/FRB) project has discovered the most repeating fast radio burst (FRB) sources of any telescope. However, most of the physical conclusions derived from this sample are based on data with a time resolution of $\sim$1 ms. In this work, we present for the first time a morphological analysis of the raw voltage data for 118 burst… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02870v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02870v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02870v1-abstract-full" style="display: none;"> The Canadian Hydrogen Intensity Mapping Experiment Fast Radio Burst (CHIME/FRB) project has discovered the most repeating fast radio burst (FRB) sources of any telescope. However, most of the physical conclusions derived from this sample are based on data with a time resolution of $\sim$1 ms. In this work, we present for the first time a morphological analysis of the raw voltage data for 118 bursts from 32 of CHIME/FRB's repeating sources. We do not find any significant correlations amongst fluence, dispersion measure (DM), burst rate, and burst duration. Performing the first large-scale morphological comparison at timescales down to microseconds between our repeating sources and 125 non-repeating FRBs, we find that repeaters are narrower in frequency and broader in duration than non-repeaters, supporting previous findings. However, we find that the duration-normalized sub-burst widths of the two populations are consistent, possibly suggesting a shared physical emission mechanism. Additionally, we find that the spectral fluences of the two are consistent. When combined with the larger bandwidths and previously found larger DMs of non-repeaters, this suggests that non-repeaters may have higher intrinsic specific energies than repeating FRBs. We do not find any consistent increase or decrease in the DM ($\lessapprox 1$ pc cm$^{-3}$ yr$^{-1}$) and scattering timescales ($\lessapprox 2$ ms yr$^{-1}$) of our sources over $\sim2-4$ year periods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02870v1-abstract-full').style.display = 'none'; document.getElementById('2411.02870v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages, 17 figures, 4 tables; Submitted to ApJ</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.24198">arXiv:2410.24198</a> <span> [<a href="https://arxiv.org/pdf/2410.24198">pdf</a>, <a href="https://arxiv.org/format/2410.24198">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> SelfCodeAlign: Self-Alignment for Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wei%2C+Y">Yuxiang Wei</a>, <a href="/search/?searchtype=author&query=Cassano%2C+F">Federico Cassano</a>, <a href="/search/?searchtype=author&query=Liu%2C+J">Jiawei Liu</a>, <a href="/search/?searchtype=author&query=Ding%2C+Y">Yifeng Ding</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Naman Jain</a>, <a href="/search/?searchtype=author&query=Mueller%2C+Z">Zachary Mueller</a>, <a href="/search/?searchtype=author&query=de+Vries%2C+H">Harm de Vries</a>, <a href="/search/?searchtype=author&query=von+Werra%2C+L">Leandro von Werra</a>, <a href="/search/?searchtype=author&query=Guha%2C+A">Arjun Guha</a>, <a href="/search/?searchtype=author&query=Zhang%2C+L">Lingming Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.24198v2-abstract-short" style="display: inline;"> Instruction tuning is a supervised fine-tuning approach that significantly improves the ability of large language models (LLMs) to follow human instructions. We propose SelfCodeAlign, the first fully transparent and permissive pipeline for self-aligning code LLMs without extensive human annotations or distillation. SelfCodeAlign employs the same base model for inference throughout the data generat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24198v2-abstract-full').style.display = 'inline'; document.getElementById('2410.24198v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.24198v2-abstract-full" style="display: none;"> Instruction tuning is a supervised fine-tuning approach that significantly improves the ability of large language models (LLMs) to follow human instructions. We propose SelfCodeAlign, the first fully transparent and permissive pipeline for self-aligning code LLMs without extensive human annotations or distillation. SelfCodeAlign employs the same base model for inference throughout the data generation process. It first extracts diverse coding concepts from high-quality seed snippets to generate new tasks. It then samples multiple responses per task, pairs each with test cases, and validates them in a sandbox environment. Finally, passing examples are selected for instruction tuning. In our primary experiments, we use SelfCodeAlign with CodeQwen1.5-7B to generate a dataset of 74k instruction-response pairs. Finetuning on this dataset leads to a model that achieves a 67.1 pass@1 on HumanEval+, surpassing CodeLlama-70B-Instruct despite being ten times smaller. Across all benchmarks, this finetuned model consistently outperforms the original version trained with OctoPack, the previous state-of-the-art method for instruction tuning without human annotations or distillation. Additionally, we show that SelfCodeAlign is effective across LLMs of various sizes, from 3B to 33B, and that the base models can benefit more from alignment with their own data distribution. We further validate each component's effectiveness in our pipeline, showing that SelfCodeAlign outperforms both direct distillation from GPT-4o and leading GPT-3.5-based distillation methods, such as OSS-Instruct and Evol-Instruct. SelfCodeAlign has also led to the creation of StarCoder2-Instruct, the first fully transparent, permissively licensed, and self-aligned code LLM that achieves state-of-the-art coding performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24198v2-abstract-full').style.display = 'none'; document.getElementById('2410.24198v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23374">arXiv:2410.23374</a> <span> [<a href="https://arxiv.org/pdf/2410.23374">pdf</a>, <a href="https://arxiv.org/format/2410.23374">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Astrophysical Phenomena">astro-ph.HE</span> </div> </div> <p class="title is-5 mathjax"> A repeating fast radio burst source in the outskirts of a quiescent galaxy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Shah%2C+V">V. Shah</a>, <a href="/search/?searchtype=author&query=Shin%2C+K">K. Shin</a>, <a href="/search/?searchtype=author&query=Leung%2C+C">C. Leung</a>, <a href="/search/?searchtype=author&query=Fong%2C+W">W. Fong</a>, <a href="/search/?searchtype=author&query=Eftekhari%2C+T">T. Eftekhari</a>, <a href="/search/?searchtype=author&query=Amiri%2C+M">M. Amiri</a>, <a href="/search/?searchtype=author&query=Andersen%2C+B+C">B. C. Andersen</a>, <a href="/search/?searchtype=author&query=Andrew%2C+S">S. Andrew</a>, <a href="/search/?searchtype=author&query=Bhardwaj%2C+M">M. Bhardwaj</a>, <a href="/search/?searchtype=author&query=Brar%2C+C">C. Brar</a>, <a href="/search/?searchtype=author&query=Cassanelli%2C+T">T. Cassanelli</a>, <a href="/search/?searchtype=author&query=Chatterjee%2C+S">S. Chatterjee</a>, <a href="/search/?searchtype=author&query=Curtin%2C+A+P">A. P. Curtin</a>, <a href="/search/?searchtype=author&query=Dobbs%2C+M">M. Dobbs</a>, <a href="/search/?searchtype=author&query=Dong%2C+Y">Y. Dong</a>, <a href="/search/?searchtype=author&query=Dong%2C+F+A">F. A. Dong</a>, <a href="/search/?searchtype=author&query=Fonseca%2C+E">E. Fonseca</a>, <a href="/search/?searchtype=author&query=Gaensler%2C+B+M">B. M. Gaensler</a>, <a href="/search/?searchtype=author&query=Halpern%2C+M">M. Halpern</a>, <a href="/search/?searchtype=author&query=Hessels%2C+J+W+T">J. W. T. Hessels</a>, <a href="/search/?searchtype=author&query=Ibik%2C+A+L">A. L. Ibik</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">N. Jain</a>, <a href="/search/?searchtype=author&query=Joseph%2C+R+C">R. C. Joseph</a>, <a href="/search/?searchtype=author&query=Kaczmarek%2C+J">J. Kaczmarek</a>, <a href="/search/?searchtype=author&query=Kahinga%2C+L+A">L. A. Kahinga</a> , et al. (24 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23374v1-abstract-short" style="display: inline;"> We report the discovery of the repeating fast radio burst source FRB 20240209A using the CHIME/FRB telescope. We have detected 22 bursts from this repeater between February and July 2024, six of which were also recorded at the Outrigger station KKO. The 66-km long CHIME-KKO baseline can provide single-pulse FRB localizations along one dimension with $2^{\prime\prime}$ accuracy. The high declinatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23374v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23374v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23374v1-abstract-full" style="display: none;"> We report the discovery of the repeating fast radio burst source FRB 20240209A using the CHIME/FRB telescope. We have detected 22 bursts from this repeater between February and July 2024, six of which were also recorded at the Outrigger station KKO. The 66-km long CHIME-KKO baseline can provide single-pulse FRB localizations along one dimension with $2^{\prime\prime}$ accuracy. The high declination of $\sim$86 degrees for this repeater allowed its detection with a rotating range of baseline vectors, enabling the combined localization region size to be constrained to $1^{\prime\prime}\times2^{\prime\prime}$. We present deep Gemini observations that, combined with the FRB localization, enabled a robust association of FRB 20240209A to the outskirts of a luminous galaxy (P(O|x) = 0.99; $L \approx 5.3 \times 10^{10}\,L_{\odot}$). FRB 20240209A has a projected physical offset of $40 \pm 5$ kpc from the center of its host galaxy, making it the FRB with the largest host galaxy offset to date. When normalized by the host galaxy size, the offset of FRB 20240209A is comparable to that of FRB 20200120E, the only FRB source known to originate in a globular cluster. We consider several explanations for the large offset, including a progenitor that was kicked from the host galaxy or in situ formation in a low-luminosity satellite galaxy of the putative host, but find the most plausible scenario to be a globular cluster origin. This, coupled with the quiescent, elliptical nature of the host as demonstrated in our companion paper, provide strong evidence for a delayed formation channel for the progenitor of the FRB source. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23374v1-abstract-full').style.display = 'none'; document.getElementById('2410.23374v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to AAS Journals</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16173">arXiv:2410.16173</a> <span> [<a href="https://arxiv.org/pdf/2410.16173">pdf</a>, <a href="https://arxiv.org/format/2410.16173">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Fast Physics-Informed Model Predictive Control Approximation for Lyapunov Stability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Rivera%2C+J+N">Josue N. Rivera</a>, <a href="/search/?searchtype=author&query=Ruan%2C+J">Jianqi Ruan</a>, <a href="/search/?searchtype=author&query=Xu%2C+X">XiaoLin Xu</a>, <a href="/search/?searchtype=author&query=Yang%2C+S">Shuting Yang</a>, <a href="/search/?searchtype=author&query=Sun%2C+D">Dengfeng Sun</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Neera Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16173v1-abstract-short" style="display: inline;"> At the forefront of control techniques is Model Predictive Control (MPC). While MPCs are effective, their requisite to recompute an optimal control given a new state leads to sparse response to the system and may make their implementation infeasible in small systems with low computational resources. To address these limitations in stability control, this research presents a small deterministic Phy… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16173v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16173v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16173v1-abstract-full" style="display: none;"> At the forefront of control techniques is Model Predictive Control (MPC). While MPCs are effective, their requisite to recompute an optimal control given a new state leads to sparse response to the system and may make their implementation infeasible in small systems with low computational resources. To address these limitations in stability control, this research presents a small deterministic Physics-Informed MPC Surrogate model (PI-MPCS). PI-MPCS was developed to approximate the control by an MPC while encouraging stability and robustness through the integration of the system dynamics and the formation of a Lyapunov stability profile. Empirical results are presented on the task of 2D quadcopter landing. They demonstrate a rapid and precise MPC approximation on a non-linear system along with an estimated two times speed up on the computational requirements when compared against an MPC. PI-MPCS, in addition, displays a level of stable control for in- and out-of-distribution states as encouraged by the discrete dynamics residual and Lyapunov stability loss functions. PI-MPCS is meant to serve as a surrogate to MPC on situations in which the computational resources are limited. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16173v1-abstract-full').style.display = 'none'; document.getElementById('2410.16173v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15234">arXiv:2410.15234</a> <span> [<a href="https://arxiv.org/pdf/2410.15234">pdf</a>, <a href="https://arxiv.org/format/2410.15234">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Bias Amplification: Large Language Models as Increasingly Biased Media </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+Z">Ze Wang</a>, <a href="/search/?searchtype=author&query=Wu%2C+Z">Zekun Wu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Jeremy Zhang</a>, <a href="/search/?searchtype=author&query=Guan%2C+X">Xin Guan</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Navya Jain</a>, <a href="/search/?searchtype=author&query=Lu%2C+S">Skylar Lu</a>, <a href="/search/?searchtype=author&query=Gupta%2C+S">Saloni Gupta</a>, <a href="/search/?searchtype=author&query=Koshiyama%2C+A">Adriano Koshiyama</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15234v2-abstract-short" style="display: inline;"> Model collapse, a phenomenon where models degrade in performance due to indiscriminate use of synthetic data is well studied. However, its role in bias amplification, the progressive reinforcement of preexisting social biases in Large Language Models (LLMs) remains underexplored. In this paper, we formally define the conditions for bias amplification and demonstrate through statistical simulations… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15234v2-abstract-full').style.display = 'inline'; document.getElementById('2410.15234v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15234v2-abstract-full" style="display: none;"> Model collapse, a phenomenon where models degrade in performance due to indiscriminate use of synthetic data is well studied. However, its role in bias amplification, the progressive reinforcement of preexisting social biases in Large Language Models (LLMs) remains underexplored. In this paper, we formally define the conditions for bias amplification and demonstrate through statistical simulations that bias can intensify even in the absence of sampling errors, the primary driver of model collapse. Empirically, we investigate political bias amplification in GPT2 using a custom built benchmark for sentence continuation tasks. Our findings reveal a progressively increasing right-leaning bias. Furthermore, we evaluate three mitigation strategies, Overfitting, Preservation, and Accumulation, and show that bias amplification persists even when model collapse is mitigated. Finally, a mechanistic interpretation identifies distinct sets of neurons responsible for model collapse and bias amplification, suggesting they arise from different underlying mechanisms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15234v2-abstract-full').style.display = 'none'; document.getElementById('2410.15234v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13702">arXiv:2410.13702</a> <span> [<a href="https://arxiv.org/pdf/2410.13702">pdf</a>, <a href="https://arxiv.org/format/2410.13702">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Experimental composable key distribution using discrete-modulated continuous variable quantum cryptography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hajomer%2C+A+A+E">Adnan A. E. Hajomer</a>, <a href="/search/?searchtype=author&query=Kanitschar%2C+F">Florian Kanitschar</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nitin Jain</a>, <a href="/search/?searchtype=author&query=Hentschel%2C+M">Michael Hentschel</a>, <a href="/search/?searchtype=author&query=Zhang%2C+R">Runjia Zhang</a>, <a href="/search/?searchtype=author&query=L%C3%BCtkenhaus%2C+N">Norbert L眉tkenhaus</a>, <a href="/search/?searchtype=author&query=Andersen%2C+U+L">Ulrik L. Andersen</a>, <a href="/search/?searchtype=author&query=Pacher%2C+C">Christoph Pacher</a>, <a href="/search/?searchtype=author&query=Gehring%2C+T">Tobias Gehring</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13702v1-abstract-short" style="display: inline;"> Establishing secure data communication necessitates secure key exchange over a public channel. Quantum key distribution (QKD), which leverages the principles of quantum physics, can achieve this with information-theoretic security. The discrete modulated (DM) continuous variable (CV) QKD protocol, in particular, is a suitable candidate for large-scale deployment of quantum-safe communication due t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13702v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13702v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13702v1-abstract-full" style="display: none;"> Establishing secure data communication necessitates secure key exchange over a public channel. Quantum key distribution (QKD), which leverages the principles of quantum physics, can achieve this with information-theoretic security. The discrete modulated (DM) continuous variable (CV) QKD protocol, in particular, is a suitable candidate for large-scale deployment of quantum-safe communication due to its simplicity and compatibility with standard high-speed telecommunication technology. Here, we present the first experimental demonstration of a four-state DM CVQKD system, successfully generating composable finite-size keys, secure against collective attacks over a 20 km fiber channel with 2.3 \times 10^{9} coherent quantum states, achieving a positive composable key rate of 11.04 \times 10^{-3} bits/symbol. This accomplishment is enabled by using an advanced security proof, meticulously selecting its parameters, and the fast, stable operation of the system. Our results mark a significant step toward the large-scale deployment of practical, high-performance, cost-effective, and highly secure quantum key distribution networks using standard telecommunication components. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13702v1-abstract-full').style.display = 'none'; document.getElementById('2410.13702v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08938">arXiv:2410.08938</a> <span> [<a href="https://arxiv.org/pdf/2410.08938">pdf</a>, <a href="https://arxiv.org/format/2410.08938">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> KinDEL: DNA-Encoded Library Dataset for Kinase Inhibitors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+B">Benson Chen</a>, <a href="/search/?searchtype=author&query=Danel%2C+T">Tomasz Danel</a>, <a href="/search/?searchtype=author&query=McEnaney%2C+P+J">Patrick J. McEnaney</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nikhil Jain</a>, <a href="/search/?searchtype=author&query=Novikov%2C+K">Kirill Novikov</a>, <a href="/search/?searchtype=author&query=Akki%2C+S+U">Spurti Umesh Akki</a>, <a href="/search/?searchtype=author&query=Turnbull%2C+J+L">Joshua L. Turnbull</a>, <a href="/search/?searchtype=author&query=Pandya%2C+V+A">Virja Atul Pandya</a>, <a href="/search/?searchtype=author&query=Belotserkovskii%2C+B+P">Boris P. Belotserkovskii</a>, <a href="/search/?searchtype=author&query=Weaver%2C+J+B">Jared Bryce Weaver</a>, <a href="/search/?searchtype=author&query=Biswas%2C+A">Ankita Biswas</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+D">Dat Nguyen</a>, <a href="/search/?searchtype=author&query=Dreiman%2C+G+H+S">Gabriel H. S. Dreiman</a>, <a href="/search/?searchtype=author&query=Sultan%2C+M">Mohammad Sultan</a>, <a href="/search/?searchtype=author&query=Stanley%2C+N">Nathaniel Stanley</a>, <a href="/search/?searchtype=author&query=Whalen%2C+D+M">Daniel M Whalen</a>, <a href="/search/?searchtype=author&query=Kanichar%2C+D">Divya Kanichar</a>, <a href="/search/?searchtype=author&query=Klein%2C+C">Christoph Klein</a>, <a href="/search/?searchtype=author&query=Fox%2C+E">Emily Fox</a>, <a href="/search/?searchtype=author&query=Watts%2C+R+E">R. Edward Watts</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08938v1-abstract-short" style="display: inline;"> DNA-Encoded Libraries (DEL) are combinatorial small molecule libraries that offer an efficient way to characterize diverse chemical spaces. Selection experiments using DELs are pivotal to drug discovery efforts, enabling high-throughput screens for hit finding. However, limited availability of public DEL datasets hinders the advancement of computational techniques designed to process such data. To… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08938v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08938v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08938v1-abstract-full" style="display: none;"> DNA-Encoded Libraries (DEL) are combinatorial small molecule libraries that offer an efficient way to characterize diverse chemical spaces. Selection experiments using DELs are pivotal to drug discovery efforts, enabling high-throughput screens for hit finding. However, limited availability of public DEL datasets hinders the advancement of computational techniques designed to process such data. To bridge this gap, we present KinDEL, one of the first large, publicly available DEL datasets on two kinases: Mitogen-Activated Protein Kinase 14 (MAPK14) and Discoidin Domain Receptor Tyrosine Kinase 1 (DDR1). Interest in this data modality is growing due to its ability to generate extensive supervised chemical data that densely samples around select molecular structures. Demonstrating one such application of the data, we benchmark different machine learning techniques to develop predictive models for hit identification; in particular, we highlight recent structure-based probabilistic approaches. Finally, we provide biophysical assay data, both on- and off-DNA, to validate our models on a smaller subset of molecules. Data and code for our benchmarks can be found at: https://github.com/insitro/kindel. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08938v1-abstract-full').style.display = 'none'; document.getElementById('2410.08938v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07960">arXiv:2410.07960</a> <span> [<a href="https://arxiv.org/pdf/2410.07960">pdf</a>, <a href="https://arxiv.org/ps/2410.07960">ps</a>, <a href="https://arxiv.org/format/2410.07960">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mathematical Physics">math-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Representation Theory">math.RT</span> </div> </div> <p class="title is-5 mathjax"> Kirillov's conjecture on Hecke-Grothendieck polynomials </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Brubaker%2C+B">Ben Brubaker</a>, <a href="/search/?searchtype=author&query=Dasher%2C+A+S">A. Suki Dasher</a>, <a href="/search/?searchtype=author&query=Hu%2C+M">Michael Hu</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nupur Jain</a>, <a href="/search/?searchtype=author&query=Li%2C+Y">Yifan Li</a>, <a href="/search/?searchtype=author&query=Lin%2C+Y">Yi Lin</a>, <a href="/search/?searchtype=author&query=Mihaila%2C+M">Maria Mihaila</a>, <a href="/search/?searchtype=author&query=Tran%2C+V">Van Tran</a>, <a href="/search/?searchtype=author&query=%C3%9Cnel%2C+I+D">I. Deniz 脺nel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07960v1-abstract-short" style="display: inline;"> We use algebraic methods in statistical mechanics to represent a multi-parameter class of polynomials in severable variables as partition functions of a new family of solvable lattice models. The class of polynomials, defined by A.N. Kirillov, is derived from the largest class of divided difference operators satisfying the braid relations of Cartan type $A$. It includes as specializations Schubert… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07960v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07960v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07960v1-abstract-full" style="display: none;"> We use algebraic methods in statistical mechanics to represent a multi-parameter class of polynomials in severable variables as partition functions of a new family of solvable lattice models. The class of polynomials, defined by A.N. Kirillov, is derived from the largest class of divided difference operators satisfying the braid relations of Cartan type $A$. It includes as specializations Schubert, Grothendieck, and dual-Grothendieck polynomials among others. In particular, our results prove positivity conjectures of Kirillov for the subfamily of Hecke--Grothendieck polynomials, while the larger family is shown to exhibit rare instances of negative coefficients. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07960v1-abstract-full').style.display = 'none'; document.getElementById('2410.07960v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04420">arXiv:2410.04420</a> <span> [<a href="https://arxiv.org/pdf/2410.04420">pdf</a>, <a href="https://arxiv.org/ps/2410.04420">ps</a>, <a href="https://arxiv.org/format/2410.04420">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> </div> <p class="title is-5 mathjax"> Non-deterministic asynchronous automata games and their undecidability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Adsul%2C+B">Bharat Adsul</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nehul Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04420v1-abstract-short" style="display: inline;"> We propose a new model of a distributed game, called an ATS game, which is played on a non-deterministic asynchronous transition system -- a natural distributed finite-state device working on Mazurkiewicz traces. This new partial-information game is played between an environment and a distributed system comprising multiple processes. A distributed strategy uses causal past to make the next move.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04420v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04420v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04420v1-abstract-full" style="display: none;"> We propose a new model of a distributed game, called an ATS game, which is played on a non-deterministic asynchronous transition system -- a natural distributed finite-state device working on Mazurkiewicz traces. This new partial-information game is played between an environment and a distributed system comprising multiple processes. A distributed strategy uses causal past to make the next move. The key algorithmic question is to solve the game, that is, to decide the existence of a distributed winning strategy. It turns out ATS games are equivalent to asynchronous games, which are known to be undecidable. We prove that ATS games are undecidable in this article. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04420v1-abstract-full').style.display = 'none'; document.getElementById('2410.04420v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">1 lemma 10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03750">arXiv:2410.03750</a> <span> [<a href="https://arxiv.org/pdf/2410.03750">pdf</a>, <a href="https://arxiv.org/format/2410.03750">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SQFT: Low-cost Model Adaptation in Low-precision Sparse Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Mu%C3%B1oz%2C+J+P">Juan Pablo Mu帽oz</a>, <a href="/search/?searchtype=author&query=Yuan%2C+J">Jinjie Yuan</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nilesh Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03750v1-abstract-short" style="display: inline;"> Large pre-trained models (LPMs), such as large language models, have become ubiquitous and are employed in many applications. These models are often adapted to a desired domain or downstream task through a fine-tuning stage. This paper proposes SQFT, an end-to-end solution for low-precision sparse parameter-efficient fine-tuning of LPMs, allowing for effective model manipulation in resource-constr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03750v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03750v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03750v1-abstract-full" style="display: none;"> Large pre-trained models (LPMs), such as large language models, have become ubiquitous and are employed in many applications. These models are often adapted to a desired domain or downstream task through a fine-tuning stage. This paper proposes SQFT, an end-to-end solution for low-precision sparse parameter-efficient fine-tuning of LPMs, allowing for effective model manipulation in resource-constrained environments. Additionally, an innovative strategy enables the merging of sparse weights with low-rank adapters without losing sparsity and accuracy, overcoming the limitations of previous approaches. SQFT also addresses the challenge of having quantized weights and adapters with different numerical precisions, enabling merging in the desired numerical format without sacrificing accuracy. Multiple adaptation scenarios, models, and comprehensive sparsity levels demonstrate the effectiveness of SQFT. Models and code are available at https://github.com/IntelLabs/Hardware-Aware-Automated-Machine-Learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03750v1-abstract-full').style.display = 'none'; document.getElementById('2410.03750v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be published in EMNLP-24 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01103">arXiv:2410.01103</a> <span> [<a href="https://arxiv.org/pdf/2410.01103">pdf</a>, <a href="https://arxiv.org/format/2410.01103">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Approximately Aligned Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Melcer%2C+D">Daniel Melcer</a>, <a href="/search/?searchtype=author&query=Gonugondla%2C+S">Sujan Gonugondla</a>, <a href="/search/?searchtype=author&query=Perera%2C+P">Pramuditha Perera</a>, <a href="/search/?searchtype=author&query=Qian%2C+H">Haifeng Qian</a>, <a href="/search/?searchtype=author&query=Chiang%2C+W">Wen-Hao Chiang</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yanjun Wang</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nihal Jain</a>, <a href="/search/?searchtype=author&query=Garg%2C+P">Pranav Garg</a>, <a href="/search/?searchtype=author&query=Ma%2C+X">Xiaofei Ma</a>, <a href="/search/?searchtype=author&query=Deoras%2C+A">Anoop Deoras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01103v1-abstract-short" style="display: inline;"> It is common to reject undesired outputs of Large Language Models (LLMs); however, current methods to do so require an excessive amount of computation, or severely distort the distribution of outputs. We present a method to balance the distortion of the output distribution with computational efficiency, allowing for the generation of long sequences of text with difficult-to-satisfy constraints, wi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01103v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01103v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01103v1-abstract-full" style="display: none;"> It is common to reject undesired outputs of Large Language Models (LLMs); however, current methods to do so require an excessive amount of computation, or severely distort the distribution of outputs. We present a method to balance the distortion of the output distribution with computational efficiency, allowing for the generation of long sequences of text with difficult-to-satisfy constraints, with less amplification of low probability outputs compared to existing methods. We show through a series of experiments that the task-specific performance of our method is comparable to methods that do not distort the output distribution, while being much more computationally efficient. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01103v1-abstract-full').style.display = 'none'; document.getElementById('2410.01103v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages main, 22 pages total</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15400">arXiv:2409.15400</a> <span> [<a href="https://arxiv.org/pdf/2409.15400">pdf</a>, <a href="https://arxiv.org/format/2409.15400">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> </div> </div> <p class="title is-5 mathjax"> Parallel Graph Drawing Algorithm for Bipartite Planar Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jain%2C+N">Naman Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15400v1-abstract-short" style="display: inline;"> We give a parallel $O(\log(n))$-time algorithm on a CRCW PRAM to assign vertical and horizontal segments to the vertices of any planar bipartite graph $G$ in the following manner: i) Two segments cannot share an interior point ii) Two segments intersect if and only if the corresponding vertices are adjacent, which uses a polynomial number of processors. In other words, represent vertices of a plan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15400v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15400v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15400v1-abstract-full" style="display: none;"> We give a parallel $O(\log(n))$-time algorithm on a CRCW PRAM to assign vertical and horizontal segments to the vertices of any planar bipartite graph $G$ in the following manner: i) Two segments cannot share an interior point ii) Two segments intersect if and only if the corresponding vertices are adjacent, which uses a polynomial number of processors. In other words, represent vertices of a planar bipartite graph as parallel segments, and edges as intersection points between these segments. Note that two segments are not allowed to cross. Our method is based on a parallel algorithm for st-numbering which uses an ear decomposition search. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15400v1-abstract-full').style.display = 'none'; document.getElementById('2409.15400v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10245">arXiv:2409.10245</a> <span> [<a href="https://arxiv.org/pdf/2409.10245">pdf</a>, <a href="https://arxiv.org/format/2409.10245">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> From Text to Emoji: How PEFT-Driven Personality Manipulation Unleashes the Emoji Potential in LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jain%2C+N">Navya Jain</a>, <a href="/search/?searchtype=author&query=Wu%2C+Z">Zekun Wu</a>, <a href="/search/?searchtype=author&query=Munoz%2C+C">Cristian Munoz</a>, <a href="/search/?searchtype=author&query=Hilliard%2C+A">Airlie Hilliard</a>, <a href="/search/?searchtype=author&query=Guan%2C+X">Xin Guan</a>, <a href="/search/?searchtype=author&query=Koshiyama%2C+A">Adriano Koshiyama</a>, <a href="/search/?searchtype=author&query=Kazim%2C+E">Emre Kazim</a>, <a href="/search/?searchtype=author&query=Treleaven%2C+P">Philip Treleaven</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10245v4-abstract-short" style="display: inline;"> The manipulation of the personality traits of large language models (LLMs) has emerged as a key area of research. Methods like prompt-based In-Context Knowledge Editing (IKE) and gradient-based Model Editor Networks (MEND) have been explored but show irregularity and variability; IKE depends on the prompt, leading to variability and sensitivity, while MEND yields inconsistent and gibberish outputs… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10245v4-abstract-full').style.display = 'inline'; document.getElementById('2409.10245v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10245v4-abstract-full" style="display: none;"> The manipulation of the personality traits of large language models (LLMs) has emerged as a key area of research. Methods like prompt-based In-Context Knowledge Editing (IKE) and gradient-based Model Editor Networks (MEND) have been explored but show irregularity and variability; IKE depends on the prompt, leading to variability and sensitivity, while MEND yields inconsistent and gibberish outputs. To address this, we employed Opinion QA Based Parameter-Efficient Fine-Tuning (PEFT), specifically Quantized Low-Rank Adaptation (QLoRA), to manipulate the Big Five personality traits: Openness, Conscientiousness, Extraversion, Agreeableness, and Neuroticism. After PEFT, models such as Mistral-7B-Instruct and LLaMA-2-7B-chat showed a latent behaviour by generating emojis for certain traits, despite no emojis being present in the PEFT data. For instance, LLaMA-2-7B-chat generated emojis in 99.5\% of extraversion-related test instances, while Mistral-7B-Instruct did so in 92.5\% of openness-related test instances. ICL Explainability analysis indicated that the LLMs used emojis intentionally to express these traits. Mechanistic Interpretability analysis showed that this latent behaviour of LLMs could be traced to specific neurons that became activated or amplified after PEFT. This paper provides a number of novel contributions. First, introducing an Opinion QA dataset for PEFT-driven personality manipulation; second, developing metric models to benchmark LLM personality traits; third, demonstrating PEFT's superiority over IKE in personality manipulation; and finally, analysing and validating emoji usage through explainability methods such as Mechanistic Interpretability and In-context learning Explainability methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10245v4-abstract-full').style.display = 'none'; document.getElementById('2409.10245v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Findings paper of NAACL 2025 and NeurIPS 2024 Workshop on Behavioral Machine Learning</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Findings paper of NAACL 2025 and NeurIPS 2024 Workshop on Behavioral Machine Learning </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05663">arXiv:2409.05663</a> <span> [<a href="https://arxiv.org/pdf/2409.05663">pdf</a>, <a href="https://arxiv.org/format/2409.05663">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Gases">cond-mat.quant-gas</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1103/PRXQuantum.6.010318">10.1103/PRXQuantum.6.010318 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Simulating Chemistry with Fermionic Optical Superlattices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gkritsis%2C+F">Fotios Gkritsis</a>, <a href="/search/?searchtype=author&query=Dux%2C+D">Daniel Dux</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Jin Zhang</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Naman Jain</a>, <a href="/search/?searchtype=author&query=Gogolin%2C+C">Christian Gogolin</a>, <a href="/search/?searchtype=author&query=Preiss%2C+P+M">Philipp M. Preiss</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05663v2-abstract-short" style="display: inline;"> We show that quantum number preserving Ans盲tze for variational optimization in quantum chemistry find an elegant mapping to ultracold fermions in optical superlattices. Using native Hubbard dynamics, trial ground states of molecular Hamiltonians can be prepared and their molecular energies measured in the lattice. The scheme requires local control over interactions and chemical potentials and glob… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05663v2-abstract-full').style.display = 'inline'; document.getElementById('2409.05663v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05663v2-abstract-full" style="display: none;"> We show that quantum number preserving Ans盲tze for variational optimization in quantum chemistry find an elegant mapping to ultracold fermions in optical superlattices. Using native Hubbard dynamics, trial ground states of molecular Hamiltonians can be prepared and their molecular energies measured in the lattice. The scheme requires local control over interactions and chemical potentials and global control over tunneling dynamics, but foregoes the need for optical tweezers, shuttling operations, or long-range interactions. We describe a complete compilation pipeline from the molecular Hamiltonian to the sequence of lattice operations, thus providing a concrete link between quantum simulation and chemistry. Our work enables the application of recent quantum algorithmic techniques, such as Double Factorization and quantum Tailored Coupled Cluster, to present-day fermionic optical lattice systems with significant improvements in the required number of experimental repetitions. We provide detailed quantum resource estimates for small non-trivial hardware experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05663v2-abstract-full').style.display = 'none'; document.getElementById('2409.05663v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> PRX Quantum 6, 010318 (2025) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.17291">arXiv:2408.17291</a> <span> [<a href="https://arxiv.org/pdf/2408.17291">pdf</a>, <a href="https://arxiv.org/format/2408.17291">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Nuclear Theory">nucl-th</span> </div> </div> <p class="title is-5 mathjax"> Exploring the effect of positive Q-value neutron transfer in coupled-channels calculations using microscopic nuclear potentials </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jain%2C+N">N. Jain</a>, <a href="/search/?searchtype=author&query=Bhuyan%2C+M">M. Bhuyan</a>, <a href="/search/?searchtype=author&query=Mohr%2C+P">P. Mohr</a>, <a href="/search/?searchtype=author&query=Kumar%2C+R">Raj Kumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.17291v1-abstract-short" style="display: inline;"> We investigated the effect of the degree of freedom of neutron transfer on the cross section of heavy-ion fusion reactions, using the relativistic mean-field formalism within the coupled channel approach (CCFULL). We obtain the microscopic nuclear interaction potential in terms of the density distributions for the targets and projectiles with the NL3$^*$ parameter set and corresponding R3Y nucleon… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.17291v1-abstract-full').style.display = 'inline'; document.getElementById('2408.17291v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.17291v1-abstract-full" style="display: none;"> We investigated the effect of the degree of freedom of neutron transfer on the cross section of heavy-ion fusion reactions, using the relativistic mean-field formalism within the coupled channel approach (CCFULL). We obtain the microscopic nuclear interaction potential in terms of the density distributions for the targets and projectiles with the NL3$^*$ parameter set and corresponding R3Y nucleon-nucleon potential. The present analysis includes the $^{18}$O-induced reactions, for which experimental fusion cross-section is available around the Coulomb barrier. It is evident from the results that including vibrational and/or rotational degrees of freedom enhances the fusion cross-section at energies below the barrier. However, fusion hindrance persists in this energy region. To address this, we incorporated the two-neutron $(2n)$ transfer channels in the Coupled Channel calculation. A comparison with the Woods-Saxon potential (WS) shows that the R3Y nucleon-nucleon (NN) potential, with intrinsic degrees of freedom, is superior to it, especially at energies below the barrier. This superiority can be attributed to the observed higher barrier heights and lower cross-section of the WS potential compared to the relativistic R3Y NN potential for the considered reaction systems. Consequently, we employed the relativistic mean-field formalism to estimate fusion characteristics for the unknown $^{18}$O-induced reactions, namely $^{18}$O + $^{62}${Ni}, $^{18}$O + $^{70,72,76}${Ge}, $^{18}$O + $^{144,150}${Nd}, and $^{18}$O + $^{144,148,152,154}${Sm}. Our analysis highlights the significant role of positive $Q$-value neutron transfer in enhancing the sub-barrier fusion cross-section for the $^{18}$O + $^{148}${Nd} reaction with the R3Y NN potential. However, the effect of this transfer channel for the other considered reactions is comparatively less pronounced. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.17291v1-abstract-full').style.display = 'none'; document.getElementById('2408.17291v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15256">arXiv:2408.15256</a> <span> [<a href="https://arxiv.org/pdf/2408.15256">pdf</a>, <a href="https://arxiv.org/format/2408.15256">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Improving Ontology Requirements Engineering with OntoChat and Participatory Prompting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhao%2C+Y">Yihang Zhao</a>, <a href="/search/?searchtype=author&query=Zhang%2C+B">Bohui Zhang</a>, <a href="/search/?searchtype=author&query=Hu%2C+X">Xi Hu</a>, <a href="/search/?searchtype=author&query=Ouyang%2C+S">Shuyin Ouyang</a>, <a href="/search/?searchtype=author&query=Kim%2C+J">Jongmo Kim</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nitisha Jain</a>, <a href="/search/?searchtype=author&query=de+Berardinis%2C+J">Jacopo de Berardinis</a>, <a href="/search/?searchtype=author&query=Mero%C3%B1o-Pe%C3%B1uela%2C+A">Albert Mero帽o-Pe帽uela</a>, <a href="/search/?searchtype=author&query=Simperl%2C+E">Elena Simperl</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15256v3-abstract-short" style="display: inline;"> Past ontology requirements engineering (ORE) has primarily relied on manual methods, such as interviews and collaborative forums, to gather user requirements from domain experts, especially in large projects. Current OntoChat offers a framework for ORE that utilises large language models (LLMs) to streamline the process through four key functions: user story creation, competency question (CQ) extr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15256v3-abstract-full').style.display = 'inline'; document.getElementById('2408.15256v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15256v3-abstract-full" style="display: none;"> Past ontology requirements engineering (ORE) has primarily relied on manual methods, such as interviews and collaborative forums, to gather user requirements from domain experts, especially in large projects. Current OntoChat offers a framework for ORE that utilises large language models (LLMs) to streamline the process through four key functions: user story creation, competency question (CQ) extraction, CQ filtration and analysis, and ontology testing support. In OntoChat, users are expected to prompt the chatbot to generate user stories. However, preliminary evaluations revealed that they struggle to do this effectively. To address this issue, we experimented with a research method called participatory prompting, which involves researcher-mediated interactions to help users without deep knowledge of LLMs use the chatbot more effectively. This participatory prompting user study produces pre-defined prompt templates based on user queries, focusing on creating and refining personas, goals, scenarios, sample data, and data resources for user stories. These refined user stories will subsequently be converted into CQs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15256v3-abstract-full').style.display = 'none'; document.getElementById('2408.15256v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20978">arXiv:2407.20978</a> <span> [<a href="https://arxiv.org/pdf/2407.20978">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> Are gene-by-environment interactions leveraged in multi-modality neural networks for breast cancer prediction? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Isgut%2C+M">Monica Isgut</a>, <a href="/search/?searchtype=author&query=Hornback%2C+A">Andrew Hornback</a>, <a href="/search/?searchtype=author&query=Luo%2C+Y">Yunan Luo</a>, <a href="/search/?searchtype=author&query=Khimani%2C+A">Asma Khimani</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Neha Jain</a>, <a href="/search/?searchtype=author&query=Wang%2C+M+D">May D. Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20978v1-abstract-short" style="display: inline;"> Polygenic risk scores (PRSs) can significantly enhance breast cancer risk prediction when combined with clinical risk factor data. While many studies have explored the value-add of PRSs, little is known about the potential impact of gene-by-gene or gene-by-environment interactions towards enhancing the risk discrimination capabilities of multi-modal models combining PRSs with clinical data. In thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20978v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20978v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20978v1-abstract-full" style="display: none;"> Polygenic risk scores (PRSs) can significantly enhance breast cancer risk prediction when combined with clinical risk factor data. While many studies have explored the value-add of PRSs, little is known about the potential impact of gene-by-gene or gene-by-environment interactions towards enhancing the risk discrimination capabilities of multi-modal models combining PRSs with clinical data. In this study, we integrated data on 318 individual genotype variants along with clinical data in a neural network to explore whether gene-by-gene (i.e., between individual variants) and/or gene-by-environment (between clinical risk factors and variants) interactions could be leveraged jointly during training to improve breast cancer risk prediction performance. We benchmarked our approach against a baseline model combining traditional univariate PRSs with clinical data in a logistic regression model and ran an interpretability analysis to identify feature interactions. While our model did not demonstrate improved performance over the baseline, we discovered 248 (<1%) statistically significant gene-by-gene and gene-by-environment interactions out of the ~53.6k possible feature pairs, the most contributory of which included rs6001930 (MKL1) and rs889312 (MAP3K1), with age and menopause being the most heavily interacting non-genetic risk factors. We also modeled the significant interactions as a network of highly connected features, suggesting that potential higher-order interactions are captured by the model. Although gene-by-environment (or gene-by-gene) interactions did not enhance breast cancer risk prediction performance in neural networks, our study provides evidence that these interactions can be leveraged by these models to inform their predictions. This study represents the first application of neural networks to screen for interactions impacting breast cancer risk using real-world data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20978v1-abstract-full').style.display = 'none'; document.getElementById('2407.20978v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18278">arXiv:2407.18278</a> <span> [<a href="https://arxiv.org/pdf/2407.18278">pdf</a>, <a href="https://arxiv.org/format/2407.18278">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Talking Wikidata: Communication patterns and their impact on community engagement in collaborative knowledge graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Koutsiana%2C+E">Elisavet Koutsiana</a>, <a href="/search/?searchtype=author&query=Reklos%2C+I">Ioannis Reklos</a>, <a href="/search/?searchtype=author&query=Alghamdi%2C+K+S">Kholoud Saad Alghamdi</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nitisha Jain</a>, <a href="/search/?searchtype=author&query=Mero%C3%B1o-Pe%C3%B1uela%2C+A">Albert Mero帽o-Pe帽uela</a>, <a href="/search/?searchtype=author&query=Simperl%2C+E">Elena Simperl</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18278v2-abstract-short" style="display: inline;"> We study collaboration patterns of Wikidata, one of the world's largest open source collaborative knowledge graph (KG) communities. Collaborative KG communities, play a key role in structuring machine-readable knowledge to support AI systems like conversational agents. However, these communities face challenges related to long-term member engagement, as a small subset of contributors often is resp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18278v2-abstract-full').style.display = 'inline'; document.getElementById('2407.18278v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18278v2-abstract-full" style="display: none;"> We study collaboration patterns of Wikidata, one of the world's largest open source collaborative knowledge graph (KG) communities. Collaborative KG communities, play a key role in structuring machine-readable knowledge to support AI systems like conversational agents. However, these communities face challenges related to long-term member engagement, as a small subset of contributors often is responsible for the majority of contributions and decision-making. While prior research has explored contributors' roles and lifespans, discussions within collaborative KG communities remain understudied. To fill this gap, we investigated the behavioural patterns of contributors and factors affecting their communication and participation. We analysed all the discussions on Wikidata using a mixed methods approach, including statistical tests, network analysis, and text and graph embedding representations. Our findings reveal that the interactions between Wikidata editors form a small world network, resilient to dropouts and inclusive, where both the network topology and discussion content influence the continuity of conversations. Furthermore, the account age of Wikidata members and their conversations are significant factors in their long-term engagement with the project. Our observations and recommendations can benefit the Wikidata and semantic web communities, providing guidance on how to improve collaborative environments for sustainability, growth, and quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18278v2-abstract-full').style.display = 'none'; document.getElementById('2407.18278v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted at TGDK</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.16883">arXiv:2407.16883</a> <span> [<a href="https://arxiv.org/pdf/2407.16883">pdf</a>, <a href="https://arxiv.org/format/2407.16883">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Standardized Machine-readable Dataset Documentation Format for Responsible AI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jain%2C+N">Nitisha Jain</a>, <a href="/search/?searchtype=author&query=Akhtar%2C+M">Mubashara Akhtar</a>, <a href="/search/?searchtype=author&query=Giner-Miguelez%2C+J">Joan Giner-Miguelez</a>, <a href="/search/?searchtype=author&query=Shinde%2C+R">Rajat Shinde</a>, <a href="/search/?searchtype=author&query=Vanschoren%2C+J">Joaquin Vanschoren</a>, <a href="/search/?searchtype=author&query=Vogler%2C+S">Steffen Vogler</a>, <a href="/search/?searchtype=author&query=Goswami%2C+S">Sujata Goswami</a>, <a href="/search/?searchtype=author&query=Rao%2C+Y">Yuhan Rao</a>, <a href="/search/?searchtype=author&query=Santos%2C+T">Tim Santos</a>, <a href="/search/?searchtype=author&query=Oala%2C+L">Luis Oala</a>, <a href="/search/?searchtype=author&query=Karamousadakis%2C+M">Michalis Karamousadakis</a>, <a href="/search/?searchtype=author&query=Maskey%2C+M">Manil Maskey</a>, <a href="/search/?searchtype=author&query=Marcenac%2C+P">Pierre Marcenac</a>, <a href="/search/?searchtype=author&query=Conforti%2C+C">Costanza Conforti</a>, <a href="/search/?searchtype=author&query=Kuchnik%2C+M">Michael Kuchnik</a>, <a href="/search/?searchtype=author&query=Aroyo%2C+L">Lora Aroyo</a>, <a href="/search/?searchtype=author&query=Benjelloun%2C+O">Omar Benjelloun</a>, <a href="/search/?searchtype=author&query=Simperl%2C+E">Elena Simperl</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.16883v1-abstract-short" style="display: inline;"> Data is critical to advancing AI technologies, yet its quality and documentation remain significant challenges, leading to adverse downstream effects (e.g., potential biases) in AI applications. This paper addresses these issues by introducing Croissant-RAI, a machine-readable metadata format designed to enhance the discoverability, interoperability, and trustworthiness of AI datasets. Croissant-R… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16883v1-abstract-full').style.display = 'inline'; document.getElementById('2407.16883v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.16883v1-abstract-full" style="display: none;"> Data is critical to advancing AI technologies, yet its quality and documentation remain significant challenges, leading to adverse downstream effects (e.g., potential biases) in AI applications. This paper addresses these issues by introducing Croissant-RAI, a machine-readable metadata format designed to enhance the discoverability, interoperability, and trustworthiness of AI datasets. Croissant-RAI extends the Croissant metadata format and builds upon existing responsible AI (RAI) documentation frameworks, offering a standardized set of attributes and practices to facilitate community-wide adoption. Leveraging established web-publishing practices, such as Schema.org, Croissant-RAI enables dataset users to easily find and utilize RAI metadata regardless of the platform on which the datasets are published. Furthermore, it is seamlessly integrated into major data search engines, repositories, and machine learning frameworks, streamlining the reading and writing of responsible AI metadata within practitioners' existing workflows. Croissant-RAI was developed through a community-led effort. It has been designed to be adaptable to evolving documentation requirements and is supported by a Python library and a visual editor. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16883v1-abstract-full').style.display = 'none'; document.getElementById('2407.16883v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, appendix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09726">arXiv:2407.09726</a> <span> [<a href="https://arxiv.org/pdf/2407.09726">pdf</a>, <a href="https://arxiv.org/format/2407.09726">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> On Mitigating Code LLM Hallucinations with API Documentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jain%2C+N">Nihal Jain</a>, <a href="/search/?searchtype=author&query=Kwiatkowski%2C+R">Robert Kwiatkowski</a>, <a href="/search/?searchtype=author&query=Ray%2C+B">Baishakhi Ray</a>, <a href="/search/?searchtype=author&query=Ramanathan%2C+M+K">Murali Krishna Ramanathan</a>, <a href="/search/?searchtype=author&query=Kumar%2C+V">Varun Kumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09726v1-abstract-short" style="display: inline;"> In this study, we address the issue of API hallucinations in various software engineering contexts. We introduce CloudAPIBench, a new benchmark designed to measure API hallucination occurrences. CloudAPIBench also provides annotations for frequencies of API occurrences in the public domain, allowing us to study API hallucinations at various frequency levels. Our findings reveal that Code LLMs stru… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09726v1-abstract-full').style.display = 'inline'; document.getElementById('2407.09726v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09726v1-abstract-full" style="display: none;"> In this study, we address the issue of API hallucinations in various software engineering contexts. We introduce CloudAPIBench, a new benchmark designed to measure API hallucination occurrences. CloudAPIBench also provides annotations for frequencies of API occurrences in the public domain, allowing us to study API hallucinations at various frequency levels. Our findings reveal that Code LLMs struggle with low frequency APIs: for e.g., GPT-4o achieves only 38.58% valid low frequency API invocations. We demonstrate that Documentation Augmented Generation (DAG) significantly improves performance for low frequency APIs (increase to 47.94% with DAG) but negatively impacts high frequency APIs when using sub-optimal retrievers (a 39.02% absolute drop). To mitigate this, we propose to intelligently trigger DAG where we check against an API index or leverage Code LLMs' confidence scores to retrieve only when needed. We demonstrate that our proposed methods enhance the balance between low and high frequency API performance, resulting in more reliable API invocations (8.20% absolute improvement on CloudAPIBench for GPT-4o). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09726v1-abstract-full').style.display = 'none'; document.getElementById('2407.09726v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08193">arXiv:2407.08193</a> <span> [<a href="https://arxiv.org/pdf/2407.08193">pdf</a>, <a href="https://arxiv.org/ps/2407.08193">ps</a>, <a href="https://arxiv.org/format/2407.08193">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Commutative Algebra">math.AC</span> </div> </div> <p class="title is-5 mathjax"> On constacyclic codes over a class of non-chain rings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jain%2C+N">Nikita Jain</a>, <a href="/search/?searchtype=author&query=Dutt%2C+S">Sucheta Dutt</a>, <a href="/search/?searchtype=author&query=Sehmi%2C+R">Ranjeet Sehmi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08193v1-abstract-short" style="display: inline;"> In this paper, a unique form of generators of a constacyclic code of arbitrary length over a non-chain ring of the type $\mathtt{R_{_胃}}=Z_{4}+谓Z_{4}, 谓^{2}=胃\in Z_{4}+谓Z_{4}$ has been obtained. Further, rank and cardinality of a constacyclic code of arbitrary length over a non-chain ring of the type $\mathtt{R_{_胃}}$ have been obtained by determining a minimal spanning set of the code. Also, nece… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08193v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08193v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08193v1-abstract-full" style="display: none;"> In this paper, a unique form of generators of a constacyclic code of arbitrary length over a non-chain ring of the type $\mathtt{R_{_胃}}=Z_{4}+谓Z_{4}, 谓^{2}=胃\in Z_{4}+谓Z_{4}$ has been obtained. Further, rank and cardinality of a constacyclic code of arbitrary length over a non-chain ring of the type $\mathtt{R_{_胃}}$ have been obtained by determining a minimal spanning set of the code. Also, necessary and sufficient conditions for a constacyclic code of arbitrary length over a non-chain ring of the type $\mathtt{R_{_胃}}$ to be reversible have been determined. Examples have also been presented in support of our results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08193v1-abstract-full').style.display = 'none'; document.getElementById('2407.08193v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19314">arXiv:2406.19314</a> <span> [<a href="https://arxiv.org/pdf/2406.19314">pdf</a>, <a href="https://arxiv.org/format/2406.19314">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LiveBench: A Challenging, Contamination-Free LLM Benchmark </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=White%2C+C">Colin White</a>, <a href="/search/?searchtype=author&query=Dooley%2C+S">Samuel Dooley</a>, <a href="/search/?searchtype=author&query=Roberts%2C+M">Manley Roberts</a>, <a href="/search/?searchtype=author&query=Pal%2C+A">Arka Pal</a>, <a href="/search/?searchtype=author&query=Feuer%2C+B">Ben Feuer</a>, <a href="/search/?searchtype=author&query=Jain%2C+S">Siddhartha Jain</a>, <a href="/search/?searchtype=author&query=Shwartz-Ziv%2C+R">Ravid Shwartz-Ziv</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Neel Jain</a>, <a href="/search/?searchtype=author&query=Saifullah%2C+K">Khalid Saifullah</a>, <a href="/search/?searchtype=author&query=Naidu%2C+S">Siddartha Naidu</a>, <a href="/search/?searchtype=author&query=Hegde%2C+C">Chinmay Hegde</a>, <a href="/search/?searchtype=author&query=LeCun%2C+Y">Yann LeCun</a>, <a href="/search/?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a>, <a href="/search/?searchtype=author&query=Neiswanger%2C+W">Willie Neiswanger</a>, <a href="/search/?searchtype=author&query=Goldblum%2C+M">Micah Goldblum</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19314v1-abstract-short" style="display: inline;"> Test set contamination, wherein test data from a benchmark ends up in a newer model's training set, is a well-documented obstacle for fair LLM evaluation and can quickly render benchmarks obsolete. To mitigate this, many recent benchmarks crowdsource new prompts and evaluations from human or LLM judges; however, these can introduce significant biases, and break down when scoring hard questions. In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19314v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19314v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19314v1-abstract-full" style="display: none;"> Test set contamination, wherein test data from a benchmark ends up in a newer model's training set, is a well-documented obstacle for fair LLM evaluation and can quickly render benchmarks obsolete. To mitigate this, many recent benchmarks crowdsource new prompts and evaluations from human or LLM judges; however, these can introduce significant biases, and break down when scoring hard questions. In this work, we introduce a new benchmark for LLMs designed to be immune to both test set contamination and the pitfalls of LLM judging and human crowdsourcing. We release LiveBench, the first benchmark that (1) contains frequently-updated questions from recent information sources, (2) scores answers automatically according to objective ground-truth values, and (3) contains a wide variety of challenging tasks, spanning math, coding, reasoning, language, instruction following, and data analysis. To achieve this, LiveBench contains questions that are based on recently-released math competitions, arXiv papers, news articles, and datasets, and it contains harder, contamination-free versions of tasks from previous benchmarks such as Big-Bench Hard, AMPS, and IFEval. We evaluate many prominent closed-source models, as well as dozens of open-source models ranging from 0.5B to 110B in size. LiveBench is difficult, with top models achieving below 65% accuracy. We release all questions, code, and model answers. Questions will be added and updated on a monthly basis, and we will release new tasks and harder versions of tasks over time so that LiveBench can distinguish between the capabilities of LLMs as they improve in the future. We welcome community engagement and collaboration for expanding the benchmark tasks and models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19314v1-abstract-full').style.display = 'none'; document.getElementById('2406.19314v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15877">arXiv:2406.15877</a> <span> [<a href="https://arxiv.org/pdf/2406.15877">pdf</a>, <a href="https://arxiv.org/format/2406.15877">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> BigCodeBench: Benchmarking Code Generation with Diverse Function Calls and Complex Instructions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhuo%2C+T+Y">Terry Yue Zhuo</a>, <a href="/search/?searchtype=author&query=Vu%2C+M+C">Minh Chien Vu</a>, <a href="/search/?searchtype=author&query=Chim%2C+J">Jenny Chim</a>, <a href="/search/?searchtype=author&query=Hu%2C+H">Han Hu</a>, <a href="/search/?searchtype=author&query=Yu%2C+W">Wenhao Yu</a>, <a href="/search/?searchtype=author&query=Widyasari%2C+R">Ratnadira Widyasari</a>, <a href="/search/?searchtype=author&query=Yusuf%2C+I+N+B">Imam Nur Bani Yusuf</a>, <a href="/search/?searchtype=author&query=Zhan%2C+H">Haolan Zhan</a>, <a href="/search/?searchtype=author&query=He%2C+J">Junda He</a>, <a href="/search/?searchtype=author&query=Paul%2C+I">Indraneil Paul</a>, <a href="/search/?searchtype=author&query=Brunner%2C+S">Simon Brunner</a>, <a href="/search/?searchtype=author&query=Gong%2C+C">Chen Gong</a>, <a href="/search/?searchtype=author&query=Hoang%2C+T">Thong Hoang</a>, <a href="/search/?searchtype=author&query=Zebaze%2C+A+R">Armel Randy Zebaze</a>, <a href="/search/?searchtype=author&query=Hong%2C+X">Xiaoheng Hong</a>, <a href="/search/?searchtype=author&query=Li%2C+W">Wen-Ding Li</a>, <a href="/search/?searchtype=author&query=Kaddour%2C+J">Jean Kaddour</a>, <a href="/search/?searchtype=author&query=Xu%2C+M">Ming Xu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhihan Zhang</a>, <a href="/search/?searchtype=author&query=Yadav%2C+P">Prateek Yadav</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Naman Jain</a>, <a href="/search/?searchtype=author&query=Gu%2C+A">Alex Gu</a>, <a href="/search/?searchtype=author&query=Cheng%2C+Z">Zhoujun Cheng</a>, <a href="/search/?searchtype=author&query=Liu%2C+J">Jiawei Liu</a>, <a href="/search/?searchtype=author&query=Liu%2C+Q">Qian Liu</a> , et al. (8 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15877v3-abstract-short" style="display: inline;"> Task automation has been greatly empowered by the recent advances in Large Language Models (LLMs) via Python code, where the tasks ranging from software engineering development to general-purpose reasoning. While current benchmarks have shown that LLMs can solve tasks using programs like human developers, the majority of their evaluations are limited to short and self-contained algorithmic tasks o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15877v3-abstract-full').style.display = 'inline'; document.getElementById('2406.15877v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15877v3-abstract-full" style="display: none;"> Task automation has been greatly empowered by the recent advances in Large Language Models (LLMs) via Python code, where the tasks ranging from software engineering development to general-purpose reasoning. While current benchmarks have shown that LLMs can solve tasks using programs like human developers, the majority of their evaluations are limited to short and self-contained algorithmic tasks or standalone function calls. Solving challenging and practical requires the capability of utilizing diverse function calls as tools to efficiently implement functionalities like data analysis and web development. In addition, using multiple tools to solve a task needs compositional reasoning by accurately understanding complex instructions. Fulfilling both of these characteristics can pose a great challenge for LLMs.To assess how well LLMs can solve challenging and practical tasks via programs, we introduce BigCodeBench, a benchmark that challenges LLMs to invoke multiple function calls as tools from 139 libraries and 7 domains for 1,140 fine-grained tasks. To evaluate LLMs rigorously, each task encompasses 5.6 test cases with an average branch coverage of 99%. In addition, we propose a natural-language-oriented variant of BigCodeBench, BigCodeBench-Instruct, that automatically transforms the original docstrings into short instructions only with essential information. Our extensive evaluation of 60 LLMs shows that LLMs are not yet capable of following complex instructions to use function calls precisely, with scores up to 60%, significantly lower than the human performance of 97%. The results underscore the need for further advancements in this area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15877v3-abstract-full').style.display = 'none'; document.getElementById('2406.15877v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">44 pages, 14 figures, 7 tables, built with love by the BigCode community :)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10323">arXiv:2406.10323</a> <span> [<a href="https://arxiv.org/pdf/2406.10323">pdf</a>, <a href="https://arxiv.org/format/2406.10323">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> GenQA: Generating Millions of Instructions from a Handful of Prompts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+J">Jiuhai Chen</a>, <a href="/search/?searchtype=author&query=Qadri%2C+R">Rifaa Qadri</a>, <a href="/search/?searchtype=author&query=Wen%2C+Y">Yuxin Wen</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Neel Jain</a>, <a href="/search/?searchtype=author&query=Kirchenbauer%2C+J">John Kirchenbauer</a>, <a href="/search/?searchtype=author&query=Zhou%2C+T">Tianyi Zhou</a>, <a href="/search/?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10323v1-abstract-short" style="display: inline;"> Most public instruction finetuning datasets are relatively small compared to the closed source datasets used to train industry models. To study questions about finetuning at scale, such as curricula and learning rate cooldown schedules, there is a need for industrial-scale datasets. However, this scale necessitates a data generation process that is almost entirely automated. In this work, we study… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10323v1-abstract-full').style.display = 'inline'; document.getElementById('2406.10323v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10323v1-abstract-full" style="display: none;"> Most public instruction finetuning datasets are relatively small compared to the closed source datasets used to train industry models. To study questions about finetuning at scale, such as curricula and learning rate cooldown schedules, there is a need for industrial-scale datasets. However, this scale necessitates a data generation process that is almost entirely automated. In this work, we study methods for generating large instruction datasets from a single prompt. With little human oversight, we get LLMs to write diverse sets of instruction examples ranging from simple completion tasks to complex multi-turn dialogs across a variety of subject areas. When finetuning a Llama-3 8B base model, our dataset meets or exceeds both WizardLM and Ultrachat on both knowledge-intensive leaderboard tasks as well as conversational evaluations. We release our dataset, the "generator" prompts that created it, and our finetuned model checkpoints. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10323v1-abstract-full').style.display = 'none'; document.getElementById('2406.10323v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9.5 pages, 6 Figures, and 3 tables in the main body. Dataset available at https://huggingface.co/datasets/tomg-group-umd/GenQA</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10209">arXiv:2406.10209</a> <span> [<a href="https://arxiv.org/pdf/2406.10209">pdf</a>, <a href="https://arxiv.org/format/2406.10209">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Be like a Goldfish, Don't Memorize! Mitigating Memorization in Generative LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hans%2C+A">Abhimanyu Hans</a>, <a href="/search/?searchtype=author&query=Wen%2C+Y">Yuxin Wen</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Neel Jain</a>, <a href="/search/?searchtype=author&query=Kirchenbauer%2C+J">John Kirchenbauer</a>, <a href="/search/?searchtype=author&query=Kazemi%2C+H">Hamid Kazemi</a>, <a href="/search/?searchtype=author&query=Singhania%2C+P">Prajwal Singhania</a>, <a href="/search/?searchtype=author&query=Singh%2C+S">Siddharth Singh</a>, <a href="/search/?searchtype=author&query=Somepalli%2C+G">Gowthami Somepalli</a>, <a href="/search/?searchtype=author&query=Geiping%2C+J">Jonas Geiping</a>, <a href="/search/?searchtype=author&query=Bhatele%2C+A">Abhinav Bhatele</a>, <a href="/search/?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10209v2-abstract-short" style="display: inline;"> Large language models can memorize and repeat their training data, causing privacy and copyright risks. To mitigate memorization, we introduce a subtle modification to the next-token training objective that we call the goldfish loss. During training, randomly sampled subsets of tokens are excluded from the loss computation. These dropped tokens are not memorized by the model, which prevents verbat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10209v2-abstract-full').style.display = 'inline'; document.getElementById('2406.10209v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10209v2-abstract-full" style="display: none;"> Large language models can memorize and repeat their training data, causing privacy and copyright risks. To mitigate memorization, we introduce a subtle modification to the next-token training objective that we call the goldfish loss. During training, randomly sampled subsets of tokens are excluded from the loss computation. These dropped tokens are not memorized by the model, which prevents verbatim reproduction of a complete chain of tokens from the training set. We run extensive experiments training billion-scale Llama-2 models, both pre-trained and trained from scratch, and demonstrate significant reductions in extractable memorization with little to no impact on downstream benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10209v2-abstract-full').style.display = 'none'; document.getElementById('2406.10209v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 8 figures, and 1 table in the main body. Code available at https://github.com/ahans30/goldfish-loss and checkpoints at https://huggingface.co/collections/tomg-group-umd/goldfish-loss-mitigating-memorization-in-llms-66c175becb6aab07744f7272</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05901">arXiv:2406.05901</a> <span> [<a href="https://arxiv.org/pdf/2406.05901">pdf</a>, <a href="https://arxiv.org/format/2406.05901">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Plasma Physics">physics.plasm-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Solar and Stellar Astrophysics">astro-ph.SR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Space Physics">physics.space-ph</span> </div> </div> <p class="title is-5 mathjax"> Simulation Models for Exploring Magnetic Reconnection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Shay%2C+M">Michael Shay</a>, <a href="/search/?searchtype=author&query=Adhikari%2C+S">Subash Adhikari</a>, <a href="/search/?searchtype=author&query=Beesho%2C+N">Naoki Beesho</a>, <a href="/search/?searchtype=author&query=Birn%2C+J">Joachim Birn</a>, <a href="/search/?searchtype=author&query=Buechner%2C+J">Jorg Buechner</a>, <a href="/search/?searchtype=author&query=Cassak%2C+P">Paul Cassak</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Li-Jen Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yuxi Chen</a>, <a href="/search/?searchtype=author&query=Cozzani%2C+G">Giulia Cozzani</a>, <a href="/search/?searchtype=author&query=Drake%2C+J">Jim Drake</a>, <a href="/search/?searchtype=author&query=Guo%2C+F">Fan Guo</a>, <a href="/search/?searchtype=author&query=Hesse%2C+M">Michael Hesse</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Neeraj Jain</a>, <a href="/search/?searchtype=author&query=Pfau-Kempf%2C+Y">Yann Pfau-Kempf</a>, <a href="/search/?searchtype=author&query=Lin%2C+Y">Yu Lin</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yi-Hsin Liu</a>, <a href="/search/?searchtype=author&query=Oka%2C+M">Mitsuo Oka</a>, <a href="/search/?searchtype=author&query=Omelchenko%2C+Y+A">Yuri A. Omelchenko</a>, <a href="/search/?searchtype=author&query=Palmroth%2C+M">Minna Palmroth</a>, <a href="/search/?searchtype=author&query=Pezzi%2C+O">Oreste Pezzi</a>, <a href="/search/?searchtype=author&query=Reiff%2C+P+H">Patricia H. Reiff</a>, <a href="/search/?searchtype=author&query=Swisdak%2C+M">Marc Swisdak</a>, <a href="/search/?searchtype=author&query=Toffoletto%2C+F">Frank Toffoletto</a>, <a href="/search/?searchtype=author&query=Toth%2C+G">Gabor Toth</a>, <a href="/search/?searchtype=author&query=Wolf%2C+R+A">Richard A. Wolf</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05901v1-abstract-short" style="display: inline;"> Simulations have played a critical role in the advancement of our knowledge of magnetic reconnection. However, due to the inherently multiscale nature of reconnection, it is impossible to simulate all physics at all scales. For this reason, a wide range of simulation methods have been crafted to study particular aspects and consequences of magnetic reconnection. This chapter reviews many of these… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05901v1-abstract-full').style.display = 'inline'; document.getElementById('2406.05901v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05901v1-abstract-full" style="display: none;"> Simulations have played a critical role in the advancement of our knowledge of magnetic reconnection. However, due to the inherently multiscale nature of reconnection, it is impossible to simulate all physics at all scales. For this reason, a wide range of simulation methods have been crafted to study particular aspects and consequences of magnetic reconnection. This chapter reviews many of these methods, laying out critical assumptions, numerical techniques, and giving examples of scientific results. Plasma models described include magnetohydrodynamics (MHD), Hall MHD, Hybrid, kinetic particle-in-cell (PIC), kinetic Vlasov, Fluid models with embedded PIC, Fluid models with direct feedback from energetic populations, and the Rice Convection Model (RCM). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05901v1-abstract-full').style.display = 'none'; document.getElementById('2406.05901v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Chapter 5.2 of ISSI Book on Magnetic Reconnection, submitted to Space Science Reviews</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01075">arXiv:2406.01075</a> <span> [<a href="https://arxiv.org/pdf/2406.01075">pdf</a>, <a href="https://arxiv.org/format/2406.01075">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1063/5.0222701">10.1063/5.0222701 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Enhancing entangled two-photon absorption of Nile Red via temperature-controlled SPDC </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Krsti%C4%87%2C+A">Aleksa Krsti膰</a>, <a href="/search/?searchtype=author&query=G%C3%A4bler%2C+T+B">Tobias Bernd G盲bler</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nitish Jain</a>, <a href="/search/?searchtype=author&query=Then%2C+P">Patrick Then</a>, <a href="/search/?searchtype=author&query=Gili%2C+V+F">Valerio Flavio Gili</a>, <a href="/search/?searchtype=author&query=Saravi%2C+S">Sina Saravi</a>, <a href="/search/?searchtype=author&query=Setzpfandt%2C+F">Frank Setzpfandt</a>, <a href="/search/?searchtype=author&query=Eggeling%2C+C">Christian Eggeling</a>, <a href="/search/?searchtype=author&query=Gr%C3%A4fe%2C+M">Markus Gr盲fe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01075v2-abstract-short" style="display: inline;"> Entangled two-photon absorption can enable a linear scaling of fluorescence emission with the excitation power. In comparison to classical two-photon absorption with a quadratic scaling, this can allow fluorescence imaging or photolithography with high axial resolution at minimal exposure intensities. However, most experimental studies on two-photon absorption were not able to show an unambiguous… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01075v2-abstract-full').style.display = 'inline'; document.getElementById('2406.01075v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01075v2-abstract-full" style="display: none;"> Entangled two-photon absorption can enable a linear scaling of fluorescence emission with the excitation power. In comparison to classical two-photon absorption with a quadratic scaling, this can allow fluorescence imaging or photolithography with high axial resolution at minimal exposure intensities. However, most experimental studies on two-photon absorption were not able to show an unambiguous proof of fluorescence emission driven by entangled photon pairs. On the other hand, existing theoretical models struggle to accurately predict the entangled two-photon absorption behavior of chemically complex dyes. In this paper, we introduce an approach to simulate entangled two-photon absorption in common fluorescence dyes considering their chemical properties. Our theoretical model allows a deeper understanding of experimental results and thus the occurrence of entangled two-photon absorption. In particular, we found a remarkable dependency of the absorption probability on the phase-matching temperature of the nonlinear material. Further, we compared results of our theoretical approach to experimental data for Nile Red. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01075v2-abstract-full').style.display = 'none'; document.getElementById('2406.01075v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages, 8 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> APL Quantum 2, 016108 (2025) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.20237">arXiv:2405.20237</a> <span> [<a href="https://arxiv.org/pdf/2405.20237">pdf</a>, <a href="https://arxiv.org/format/2405.20237">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Training-efficient density quantum machine learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Coyle%2C+B">Brian Coyle</a>, <a href="/search/?searchtype=author&query=Cherrat%2C+E+A">El Amine Cherrat</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Nishant Jain</a>, <a href="/search/?searchtype=author&query=Mathur%2C+N">Natansh Mathur</a>, <a href="/search/?searchtype=author&query=Raj%2C+S">Snehal Raj</a>, <a href="/search/?searchtype=author&query=Kazdaghli%2C+S">Skander Kazdaghli</a>, <a href="/search/?searchtype=author&query=Kerenidis%2C+I">Iordanis Kerenidis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.20237v1-abstract-short" style="display: inline;"> Quantum machine learning requires powerful, flexible and efficiently trainable models to be successful in solving challenging problems. In this work, we present density quantum neural networks, a learning model incorporating randomisation over a set of trainable unitaries. These models generalise quantum neural networks using parameterised quantum circuits, and allow a trade-off between expressibi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20237v1-abstract-full').style.display = 'inline'; document.getElementById('2405.20237v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.20237v1-abstract-full" style="display: none;"> Quantum machine learning requires powerful, flexible and efficiently trainable models to be successful in solving challenging problems. In this work, we present density quantum neural networks, a learning model incorporating randomisation over a set of trainable unitaries. These models generalise quantum neural networks using parameterised quantum circuits, and allow a trade-off between expressibility and efficient trainability, particularly on quantum hardware. We demonstrate the flexibility of the formalism by applying it to two recently proposed model families. The first are commuting-block quantum neural networks (QNNs) which are efficiently trainable but may be limited in expressibility. The second are orthogonal (Hamming-weight preserving) quantum neural networks which provide well-defined and interpretable transformations on data but are challenging to train at scale on quantum devices. Density commuting QNNs improve capacity with minimal gradient complexity overhead, and density orthogonal neural networks admit a quadratic-to-constant gradient query advantage with minimal to no performance loss. We conduct numerical experiments on synthetic translationally invariant data and MNIST image data with hyperparameter optimisation to support our findings. Finally, we discuss the connection to post-variational quantum neural networks, measurement-based quantum machine learning and the dropout mechanism. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20237v1-abstract-full').style.display = 'none'; document.getElementById('2405.20237v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages main text, 9 pages appendices. 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17399">arXiv:2405.17399</a> <span> [<a href="https://arxiv.org/pdf/2405.17399">pdf</a>, <a href="https://arxiv.org/format/2405.17399">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Transformers Can Do Arithmetic with the Right Embeddings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=McLeish%2C+S">Sean McLeish</a>, <a href="/search/?searchtype=author&query=Bansal%2C+A">Arpit Bansal</a>, <a href="/search/?searchtype=author&query=Stein%2C+A">Alex Stein</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Neel Jain</a>, <a href="/search/?searchtype=author&query=Kirchenbauer%2C+J">John Kirchenbauer</a>, <a href="/search/?searchtype=author&query=Bartoldson%2C+B+R">Brian R. Bartoldson</a>, <a href="/search/?searchtype=author&query=Kailkhura%2C+B">Bhavya Kailkhura</a>, <a href="/search/?searchtype=author&query=Bhatele%2C+A">Abhinav Bhatele</a>, <a href="/search/?searchtype=author&query=Geiping%2C+J">Jonas Geiping</a>, <a href="/search/?searchtype=author&query=Schwarzschild%2C+A">Avi Schwarzschild</a>, <a href="/search/?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17399v2-abstract-short" style="display: inline;"> The poor performance of transformers on arithmetic tasks seems to stem in large part from their inability to keep track of the exact position of each digit inside of a large span of digits. We mend this problem by adding an embedding to each digit that encodes its position relative to the start of the number. In addition to the boost these embeddings provide on their own, we show that this fix ena… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17399v2-abstract-full').style.display = 'inline'; document.getElementById('2405.17399v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17399v2-abstract-full" style="display: none;"> The poor performance of transformers on arithmetic tasks seems to stem in large part from their inability to keep track of the exact position of each digit inside of a large span of digits. We mend this problem by adding an embedding to each digit that encodes its position relative to the start of the number. In addition to the boost these embeddings provide on their own, we show that this fix enables architectural modifications such as input injection and recurrent layers to improve performance even further. With positions resolved, we can study the logical extrapolation ability of transformers. Can they solve arithmetic problems that are larger and more complex than those in their training data? We find that training on only 20 digit numbers with a single GPU for one day, we can reach state-of-the-art performance, achieving up to 99% accuracy on 100 digit addition problems. Finally, we show that these gains in numeracy also unlock improvements on other multi-step reasoning tasks including sorting and multiplication. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17399v2-abstract-full').style.display = 'none'; document.getElementById('2405.17399v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.07828">arXiv:2405.07828</a> <span> [<a href="https://arxiv.org/pdf/2405.07828">pdf</a>, <a href="https://arxiv.org/format/2405.07828">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Can LLMs Help Predict Elections? (Counter)Evidence from the World's Largest Democracy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gujral%2C+P">Pratik Gujral</a>, <a href="/search/?searchtype=author&query=Awaldhi%2C+K">Kshitij Awaldhi</a>, <a href="/search/?searchtype=author&query=Jain%2C+N">Navya Jain</a>, <a href="/search/?searchtype=author&query=Bhandula%2C+B">Bhavuk Bhandula</a>, <a href="/search/?searchtype=author&query=Chakraborty%2C+A">Abhijnan Chakraborty</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.07828v1-abstract-short" style="display: inline;"> The study of how social media affects the formation of public opinion and its influence on political results has been a popular field of inquiry. However, current approaches frequently offer a limited comprehension of the complex political phenomena, yielding inconsistent outcomes. In this work, we introduce a new method: harnessing the capabilities of Large Language Models (LLMs) to examine socia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.07828v1-abstract-full').style.display = 'inline'; document.getElementById('2405.07828v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.07828v1-abstract-full" style="display: none;"> The study of how social media affects the formation of public opinion and its influence on political results has been a popular field of inquiry. However, current approaches frequently offer a limited comprehension of the complex political phenomena, yielding inconsistent outcomes. In this work, we introduce a new method: harnessing the capabilities of Large Language Models (LLMs) to examine social media data and forecast election outcomes. Our research diverges from traditional methodologies in two crucial respects. First, we utilize the sophisticated capabilities of foundational LLMs, which can comprehend the complex linguistic subtleties and contextual details present in social media data. Second, we focus on data from X (Twitter) in India to predict state assembly election outcomes. Our method entails sentiment analysis of election-related tweets through LLMs to forecast the actual election results, and we demonstrate the superiority of our LLM-based method against more traditional exit and opinion polls. Overall, our research offers valuable insights into the unique dynamics of Indian politics and the remarkable impact of social media in molding public attitudes within this context. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.07828v1-abstract-full').style.display = 'none'; document.getElementById('2405.07828v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Jain%2C+N&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Jain%2C+N&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+N&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+N&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+N&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+N&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>