CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 384 results for author: <span class="mathjax">Garg, S</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Garg%2C+S">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Garg, S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Garg%2C+S&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Garg, S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Garg%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Garg%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Garg%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Garg%2C+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Garg%2C+S&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Garg%2C+S&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14299">arXiv:2411.14299</a> <span> [<a href="https://arxiv.org/pdf/2411.14299">pdf</a>, <a href="https://arxiv.org/format/2411.14299">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Auto-SPICE: Leveraging LLMs for Dataset Creation via Automated SPICE Netlist Extraction from Analog Circuit Diagrams </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bhandari%2C+J">Jitendra Bhandari</a>, <a href="/search/cs?searchtype=author&query=Bhat%2C+V">Vineet Bhat</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yuheng He</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a>, <a href="/search/cs?searchtype=author&query=Rahmani%2C+H">Hamed Rahmani</a>, <a href="/search/cs?searchtype=author&query=Karri%2C+R">Ramesh Karri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14299v1-abstract-short" style="display: inline;"> Auto-SPICE is the first fully automated framework leveraging large language models (LLMs) to generate Simulation Programs with Integrated Circuit Emphasis (SPICE) netlists. It addresses a long-standing challenge in automating netlist generation for analog circuits within circuit design automation. Automating this workflow could accelerate the creation of finetuned LLMs for analog circuit design an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14299v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14299v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14299v1-abstract-full" style="display: none;"> Auto-SPICE is the first fully automated framework leveraging large language models (LLMs) to generate Simulation Programs with Integrated Circuit Emphasis (SPICE) netlists. It addresses a long-standing challenge in automating netlist generation for analog circuits within circuit design automation. Automating this workflow could accelerate the creation of finetuned LLMs for analog circuit design and verification. We identify key challenges in this automation and evaluate the multi-modal capabilities of state-of-the-art LLMs, particularly GPT-4, to address these issues. We propose a three-step workflow to overcome current limitations: labeling analog circuits, prompt tuning, and netlist verification. This approach aims to create an end-to-end SPICE netlist generator from circuit schematic images, tackling the long-standing hurdle of accurate netlist generation. Our framework demonstrates significant performance improvements, tested on approximately 2,100 schematics of varying complexity. We open-source this solution for community-driven development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14299v1-abstract-full').style.display = 'none'; document.getElementById('2411.14299v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11856">arXiv:2411.11856</a> <span> [<a href="https://arxiv.org/pdf/2411.11856">pdf</a>, <a href="https://arxiv.org/format/2411.11856">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Can EDA Tool Feedback Improve Verilog Generation by LLMs? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Blocklove%2C+J">Jason Blocklove</a>, <a href="/search/cs?searchtype=author&query=Thakur%2C+S">Shailja Thakur</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+B">Benjamin Tan</a>, <a href="/search/cs?searchtype=author&query=Pearce%2C+H">Hammond Pearce</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a>, <a href="/search/cs?searchtype=author&query=Karri%2C+R">Ramesh Karri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11856v1-abstract-short" style="display: inline;"> Traditionally, digital hardware designs are written in the Verilog hardware description language (HDL) and debugged manually by engineers. This can be time-consuming and error-prone for complex designs. Large Language Models (LLMs) are emerging as a potential tool to help generate fully functioning HDL code, but most works have focused on generation in the single-shot capacity: i.e., run and evalu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11856v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11856v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11856v1-abstract-full" style="display: none;"> Traditionally, digital hardware designs are written in the Verilog hardware description language (HDL) and debugged manually by engineers. This can be time-consuming and error-prone for complex designs. Large Language Models (LLMs) are emerging as a potential tool to help generate fully functioning HDL code, but most works have focused on generation in the single-shot capacity: i.e., run and evaluate, a process that does not leverage debugging and as such does not adequately reflect a realistic development process. In this work we evaluate the ability of LLMs to leverage feedback from electronic design automation (EDA) tools to fix mistakes in their own generated Verilog. To accomplish this we present an open-source, highly customizable framework, AutoChip, which combines conversational LLMs with the output from Verilog compilers and simulations to iteratively generate and repair Verilog. To determine the success of these LLMs we leverage the VerilogEval benchmark set. We evaluate four state-of-the-art conversational LLMs, focusing on readily accessible commercial models. EDA tool feedback proved to be consistently more effective than zero-shot prompting only with GPT-4o, the most computationally complex model we evaluated. In the best case we observed a 5.8% increase in the number of successful designs with a 34.2% decrease in cost over the best zero-shot results. Mixing smaller models with this larger model at the end of the feedback iterations resulted in equally as much success as with GPT-4o using feedback, but for an additional 41.9% less cost (overall decrease in cost over zero-shot of 89.6%). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11856v1-abstract-full').style.display = 'none'; document.getElementById('2411.11856v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08870">arXiv:2411.08870</a> <span> [<a href="https://arxiv.org/pdf/2411.08870">pdf</a>, <a href="https://arxiv.org/format/2411.08870">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> The Limited Impact of Medical Adaptation of Large Language and Vision-Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jeong%2C+D+P">Daniel P. Jeong</a>, <a href="/search/cs?searchtype=author&query=Mani%2C+P">Pranav Mani</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Saurabh Garg</a>, <a href="/search/cs?searchtype=author&query=Lipton%2C+Z+C">Zachary C. Lipton</a>, <a href="/search/cs?searchtype=author&query=Oberst%2C+M">Michael Oberst</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08870v1-abstract-short" style="display: inline;"> Several recent works seek to develop foundation models specifically for medical applications, adapting general-purpose large language models (LLMs) and vision-language models (VLMs) via continued pretraining on publicly available biomedical corpora. These works typically claim that such domain-adaptive pretraining (DAPT) improves performance on downstream medical tasks, such as answering medical l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08870v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08870v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08870v1-abstract-full" style="display: none;"> Several recent works seek to develop foundation models specifically for medical applications, adapting general-purpose large language models (LLMs) and vision-language models (VLMs) via continued pretraining on publicly available biomedical corpora. These works typically claim that such domain-adaptive pretraining (DAPT) improves performance on downstream medical tasks, such as answering medical licensing exam questions. In this paper, we compare ten public "medical" LLMs and two VLMs against their corresponding base models, arriving at a different conclusion: all medical VLMs and nearly all medical LLMs fail to consistently improve over their base models in the zero-/few-shot prompting and supervised fine-tuning regimes for medical question-answering (QA). For instance, across all tasks and model pairs we consider in the 3-shot setting, medical LLMs only outperform their base models in 22.7% of cases, reach a (statistical) tie in 36.8% of cases, and are significantly worse than their base models in the remaining 40.5% of cases. Our conclusions are based on (i) comparing each medical model head-to-head, directly against the corresponding base model; (ii) optimizing the prompts for each model separately in zero-/few-shot prompting; and (iii) accounting for statistical uncertainty in comparisons. While these basic practices are not consistently adopted in the literature, our ablations show that they substantially impact conclusions. Meanwhile, we find that after fine-tuning on specific QA tasks, medical LLMs can show performance improvements, but the benefits do not carry over to tasks based on clinical notes. Our findings suggest that state-of-the-art general-domain models may already exhibit strong medical knowledge and reasoning capabilities, and offer recommendations to strengthen the conclusions of future studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08870v1-abstract-full').style.display = 'none'; document.getElementById('2411.08870v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Extended version of EMNLP 2024 paper arXiv:2411.04118. Includes additional results on clinical note QA tasks and supervised fine-tuning evaluations</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05831">arXiv:2411.05831</a> <span> [<a href="https://arxiv.org/pdf/2411.05831">pdf</a>, <a href="https://arxiv.org/format/2411.05831">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> To Ask or Not to Ask? Detecting Absence of Information in Vision and Language Navigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Abraham%2C+S+S">Savitha Sam Abraham</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sourav Garg</a>, <a href="/search/cs?searchtype=author&query=Dayoub%2C+F">Feras Dayoub</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05831v1-abstract-short" style="display: inline;"> Recent research in Vision Language Navigation (VLN) has overlooked the development of agents' inquisitive abilities, which allow them to ask clarifying questions when instructions are incomplete. This paper addresses how agents can recognize "when" they lack sufficient information, without focusing on "what" is missing, particularly in VLN tasks with vague instructions. Equipping agents with this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05831v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05831v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05831v1-abstract-full" style="display: none;"> Recent research in Vision Language Navigation (VLN) has overlooked the development of agents' inquisitive abilities, which allow them to ask clarifying questions when instructions are incomplete. This paper addresses how agents can recognize "when" they lack sufficient information, without focusing on "what" is missing, particularly in VLN tasks with vague instructions. Equipping agents with this ability enhances efficiency by reducing potential digressions and seeking timely assistance. The challenge in identifying such uncertain points is balancing between being overly cautious (high recall) and overly confident (high precision). We propose an attention-based instruction-vagueness estimation module that learns associations between instructions and the agent's trajectory. By leveraging instruction-to-path alignment information during training, the module's vagueness estimation performance improves by around 52% in terms of precision-recall balance. In our ablative experiments, we also demonstrate the effectiveness of incorporating this additional instruction-to-path attention network alongside the cross-modal attention networks within the navigator module. Our results show that the attention scores from the instruction-to-path attention network serve as better indicators for estimating vagueness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05831v1-abstract-full').style.display = 'none'; document.getElementById('2411.05831v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at WACV 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05359">arXiv:2411.05359</a> <span> [<a href="https://arxiv.org/pdf/2411.05359">pdf</a>, <a href="https://arxiv.org/format/2411.05359">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Agricultural Landscape Understanding At Country-Scale </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dua%2C+R">Radhika Dua</a>, <a href="/search/cs?searchtype=author&query=Saxena%2C+N">Nikita Saxena</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Aditi Agarwal</a>, <a href="/search/cs?searchtype=author&query=Wilson%2C+A">Alex Wilson</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+G">Gaurav Singh</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+H">Hoang Tran</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+I">Ishan Deshpande</a>, <a href="/search/cs?searchtype=author&query=Kaur%2C+A">Amandeep Kaur</a>, <a href="/search/cs?searchtype=author&query=Aggarwal%2C+G">Gaurav Aggarwal</a>, <a href="/search/cs?searchtype=author&query=Nath%2C+C">Chandan Nath</a>, <a href="/search/cs?searchtype=author&query=Basu%2C+A">Arnab Basu</a>, <a href="/search/cs?searchtype=author&query=Batchu%2C+V">Vishal Batchu</a>, <a href="/search/cs?searchtype=author&query=Holla%2C+S">Sharath Holla</a>, <a href="/search/cs?searchtype=author&query=Kurle%2C+B">Bindiya Kurle</a>, <a href="/search/cs?searchtype=author&query=Missura%2C+O">Olana Missura</a>, <a href="/search/cs?searchtype=author&query=Aggarwal%2C+R">Rahul Aggarwal</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shubhika Garg</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+N">Nishi Shah</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Avneet Singh</a>, <a href="/search/cs?searchtype=author&query=Tewari%2C+D">Dinesh Tewari</a>, <a href="/search/cs?searchtype=author&query=Dondzik%2C+A">Agata Dondzik</a>, <a href="/search/cs?searchtype=author&query=Adsul%2C+B">Bharat Adsul</a>, <a href="/search/cs?searchtype=author&query=Sohoni%2C+M">Milind Sohoni</a>, <a href="/search/cs?searchtype=author&query=Praveen%2C+A+R">Asim Rama Praveen</a>, <a href="/search/cs?searchtype=author&query=Dangi%2C+A">Aaryan Dangi</a> , et al. (10 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05359v1-abstract-short" style="display: inline;"> Agricultural landscapes are quite complex, especially in the Global South where fields are smaller, and agricultural practices are more varied. In this paper we report on our progress in digitizing the agricultural landscape (natural and man-made) in our study region of India. We use high resolution imagery and a UNet style segmentation model to generate the first of its kind national-scale multi-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05359v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05359v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05359v1-abstract-full" style="display: none;"> Agricultural landscapes are quite complex, especially in the Global South where fields are smaller, and agricultural practices are more varied. In this paper we report on our progress in digitizing the agricultural landscape (natural and man-made) in our study region of India. We use high resolution imagery and a UNet style segmentation model to generate the first of its kind national-scale multi-class panoptic segmentation output. Through this work we have been able to identify individual fields across 151.7M hectares, and delineating key features such as water resources and vegetation. We share how this output was validated by our team and externally by downstream users, including some sample use cases that can lead to targeted data driven decision making. We believe this dataset will contribute towards digitizing agriculture by generating the foundational baselayer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05359v1-abstract-full').style.display = 'none'; document.getElementById('2411.05359v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">34 pages, 7 tables, 15 figs</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04118">arXiv:2411.04118</a> <span> [<a href="https://arxiv.org/pdf/2411.04118">pdf</a>, <a href="https://arxiv.org/format/2411.04118">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Medical Adaptation of Large Language and Vision-Language Models: Are We Making Progress? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jeong%2C+D+P">Daniel P. Jeong</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Saurabh Garg</a>, <a href="/search/cs?searchtype=author&query=Lipton%2C+Z+C">Zachary C. Lipton</a>, <a href="/search/cs?searchtype=author&query=Oberst%2C+M">Michael Oberst</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04118v2-abstract-short" style="display: inline;"> Several recent works seek to develop foundation models specifically for medical applications, adapting general-purpose large language models (LLMs) and vision-language models (VLMs) via continued pretraining on publicly available biomedical corpora. These works typically claim that such domain-adaptive pretraining (DAPT) improves performance on downstream medical tasks, such as answering medical l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04118v2-abstract-full').style.display = 'inline'; document.getElementById('2411.04118v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04118v2-abstract-full" style="display: none;"> Several recent works seek to develop foundation models specifically for medical applications, adapting general-purpose large language models (LLMs) and vision-language models (VLMs) via continued pretraining on publicly available biomedical corpora. These works typically claim that such domain-adaptive pretraining (DAPT) improves performance on downstream medical tasks, such as answering medical licensing exam questions. In this paper, we compare seven public "medical" LLMs and two VLMs against their corresponding base models, arriving at a different conclusion: all medical VLMs and nearly all medical LLMs fail to consistently improve over their base models in the zero-/few-shot prompting regime for medical question-answering (QA) tasks. For instance, across the tasks and model pairs we consider in the 3-shot setting, medical LLMs only outperform their base models in 12.1% of cases, reach a (statistical) tie in 49.8% of cases, and are significantly worse than their base models in the remaining 38.2% of cases. Our conclusions are based on (i) comparing each medical model head-to-head, directly against the corresponding base model; (ii) optimizing the prompts for each model separately; and (iii) accounting for statistical uncertainty in comparisons. While these basic practices are not consistently adopted in the literature, our ablations show that they substantially impact conclusions. Our findings suggest that state-of-the-art general-domain models may already exhibit strong medical knowledge and reasoning capabilities, and offer recommendations to strengthen the conclusions of future studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04118v2-abstract-full').style.display = 'none'; document.getElementById('2411.04118v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This version was published at EMNLP 2024 Main Conference as a Long Paper (Oral). See the extended version (arXiv:2411.08870) for additional results on QA tasks based on clinical notes and evaluations in the supervised fine-tuning regime</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03253">arXiv:2411.03253</a> <span> [<a href="https://arxiv.org/pdf/2411.03253">pdf</a>, <a href="https://arxiv.org/format/2411.03253">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Discovering Data Structures: Nearest Neighbor Search and Beyond </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Salemohamed%2C+O">Omar Salemohamed</a>, <a href="/search/cs?searchtype=author&query=Charlin%2C+L">Laurent Charlin</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shivam Garg</a>, <a href="/search/cs?searchtype=author&query=Sharan%2C+V">Vatsal Sharan</a>, <a href="/search/cs?searchtype=author&query=Valiant%2C+G">Gregory Valiant</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03253v1-abstract-short" style="display: inline;"> We propose a general framework for end-to-end learning of data structures. Our framework adapts to the underlying data distribution and provides fine-grained control over query and space complexity. Crucially, the data structure is learned from scratch, and does not require careful initialization or seeding with candidate data structures/algorithms. We first apply this framework to the problem of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03253v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03253v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03253v1-abstract-full" style="display: none;"> We propose a general framework for end-to-end learning of data structures. Our framework adapts to the underlying data distribution and provides fine-grained control over query and space complexity. Crucially, the data structure is learned from scratch, and does not require careful initialization or seeding with candidate data structures/algorithms. We first apply this framework to the problem of nearest neighbor search. In several settings, we are able to reverse-engineer the learned data structures and query algorithms. For 1D nearest neighbor search, the model discovers optimal distribution (in)dependent algorithms such as binary search and variants of interpolation search. In higher dimensions, the model learns solutions that resemble k-d trees in some regimes, while in others, they have elements of locality-sensitive hashing. The model can also learn useful representations of high-dimensional data and exploit them to design effective data structures. We also adapt our framework to the problem of estimating frequencies over a data stream, and believe it could also be a powerful discovery tool for new problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03253v1-abstract-full').style.display = 'none'; document.getElementById('2411.03253v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.24019">arXiv:2410.24019</a> <span> [<a href="https://arxiv.org/pdf/2410.24019">pdf</a>, <a href="https://arxiv.org/format/2410.24019">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Speech is More Than Words: Do Speech-to-Text Translation Systems Leverage Prosody? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tsiamas%2C+I">Ioannis Tsiamas</a>, <a href="/search/cs?searchtype=author&query=Sperber%2C+M">Matthias Sperber</a>, <a href="/search/cs?searchtype=author&query=Finch%2C+A">Andrew Finch</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sarthak Garg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.24019v1-abstract-short" style="display: inline;"> The prosody of a spoken utterance, including features like stress, intonation and rhythm, can significantly affect the underlying semantics, and as a consequence can also affect its textual translation. Nevertheless, prosody is rarely studied within the context of speech-to-text translation (S2TT) systems. In particular, end-to-end (E2E) systems have been proposed as well-suited for prosody-aware… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24019v1-abstract-full').style.display = 'inline'; document.getElementById('2410.24019v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.24019v1-abstract-full" style="display: none;"> The prosody of a spoken utterance, including features like stress, intonation and rhythm, can significantly affect the underlying semantics, and as a consequence can also affect its textual translation. Nevertheless, prosody is rarely studied within the context of speech-to-text translation (S2TT) systems. In particular, end-to-end (E2E) systems have been proposed as well-suited for prosody-aware translation because they have direct access to the speech signal when making translation decisions, but the understanding of whether this is successful in practice is still limited. A main challenge is the difficulty of evaluating prosody awareness in translation. To address this challenge, we introduce an evaluation methodology and a focused benchmark (named ContraProST) aimed at capturing a wide range of prosodic phenomena. Our methodology uses large language models and controllable text-to-speech (TTS) to generate contrastive examples. Through experiments in translating English speech into German, Spanish, and Japanese, we find that (a) S2TT models possess some internal representation of prosody, but the prosody signal is often not strong enough to affect the translations, (b) E2E systems outperform cascades of speech recognition and text translation systems, confirming their theoretical advantage in this regard, and (c) certain cascaded systems also capture prosodic information in the translation, but only to a lesser extent that depends on the particulars of the transcript's surface form. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24019v1-abstract-full').style.display = 'none'; document.getElementById('2410.24019v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">WMT 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23571">arXiv:2410.23571</a> <span> [<a href="https://arxiv.org/pdf/2410.23571">pdf</a>, <a href="https://arxiv.org/ps/2410.23571">ps</a>, <a href="https://arxiv.org/format/2410.23571">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Dual Agent Learning Based Aerial Trajectory Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shaswat Garg</a>, <a href="/search/cs?searchtype=author&query=Masnavi%2C+H">Houman Masnavi</a>, <a href="/search/cs?searchtype=author&query=Fidan%2C+B">Baris Fidan</a>, <a href="/search/cs?searchtype=author&query=Janabi-Sharifi%2C+F">Farrokh Janabi-Sharifi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23571v1-abstract-short" style="display: inline;"> This paper presents a novel reinforcement learning framework for trajectory tracking of unmanned aerial vehicles in cluttered environments using a dual-agent architecture. Traditional optimization methods for trajectory tracking face significant computational challenges and lack robustness in dynamic environments. Our approach employs deep reinforcement learning (RL) to overcome these limitations,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23571v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23571v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23571v1-abstract-full" style="display: none;"> This paper presents a novel reinforcement learning framework for trajectory tracking of unmanned aerial vehicles in cluttered environments using a dual-agent architecture. Traditional optimization methods for trajectory tracking face significant computational challenges and lack robustness in dynamic environments. Our approach employs deep reinforcement learning (RL) to overcome these limitations, leveraging 3D pointcloud data to perceive the environment without relying on memory-intensive obstacle representations like occupancy grids. The proposed system features two RL agents: one for predicting UAV velocities to follow a reference trajectory and another for managing collision avoidance in the presence of obstacles. This architecture ensures real-time performance and adaptability to uncertainties. We demonstrate the efficacy of our approach through simulated and real-world experiments, highlighting improvements over state-of-the-art RL and optimization-based methods. Additionally, a curriculum learning paradigm is employed to scale the algorithms to more complex environments, ensuring robust trajectory tracking and obstacle avoidance in both static and dynamic scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23571v1-abstract-full').style.display = 'none'; document.getElementById('2410.23571v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23232">arXiv:2410.23232</a> <span> [<a href="https://arxiv.org/pdf/2410.23232">pdf</a>, <a href="https://arxiv.org/format/2410.23232">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Attribute-to-Delete: Machine Unlearning via Datamodel Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Georgiev%2C+K">Kristian Georgiev</a>, <a href="/search/cs?searchtype=author&query=Rinberg%2C+R">Roy Rinberg</a>, <a href="/search/cs?searchtype=author&query=Park%2C+S+M">Sung Min Park</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shivam Garg</a>, <a href="/search/cs?searchtype=author&query=Ilyas%2C+A">Andrew Ilyas</a>, <a href="/search/cs?searchtype=author&query=Madry%2C+A">Aleksander Madry</a>, <a href="/search/cs?searchtype=author&query=Neel%2C+S">Seth Neel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23232v2-abstract-short" style="display: inline;"> Machine unlearning -- efficiently removing the effect of a small "forget set" of training data on a pre-trained machine learning model -- has recently attracted significant research interest. Despite this interest, however, recent work shows that existing machine unlearning techniques do not hold up to thorough evaluation in non-convex settings. In this work, we introduce a new machine unlearning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23232v2-abstract-full').style.display = 'inline'; document.getElementById('2410.23232v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23232v2-abstract-full" style="display: none;"> Machine unlearning -- efficiently removing the effect of a small "forget set" of training data on a pre-trained machine learning model -- has recently attracted significant research interest. Despite this interest, however, recent work shows that existing machine unlearning techniques do not hold up to thorough evaluation in non-convex settings. In this work, we introduce a new machine unlearning technique that exhibits strong empirical performance even in such challenging settings. Our starting point is the perspective that the goal of unlearning is to produce a model whose outputs are statistically indistinguishable from those of a model re-trained on all but the forget set. This perspective naturally suggests a reduction from the unlearning problem to that of data attribution, where the goal is to predict the effect of changing the training set on a model's outputs. Thus motivated, we propose the following meta-algorithm, which we call Datamodel Matching (DMM): given a trained model, we (a) use data attribution to predict the output of the model if it were re-trained on all but the forget set points; then (b) fine-tune the pre-trained model to match these predicted outputs. In a simple convex setting, we show how this approach provably outperforms a variety of iterative unlearning algorithms. Empirically, we use a combination of existing evaluations and a new metric based on the KL-divergence to show that even in non-convex settings, DMM achieves strong unlearning performance relative to existing algorithms. An added benefit of DMM is that it is a meta-algorithm, in the sense that future advances in data attribution translate directly into better unlearning algorithms, pointing to a clear direction for future progress in unlearning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23232v2-abstract-full').style.display = 'none'; document.getElementById('2410.23232v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22606">arXiv:2410.22606</a> <span> [<a href="https://arxiv.org/pdf/2410.22606">pdf</a>, <a href="https://arxiv.org/ps/2410.22606">ps</a>, <a href="https://arxiv.org/format/2410.22606">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Testing Tensor Products of Algebraic Codes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sumegha Garg</a>, <a href="/search/cs?searchtype=author&query=Sudan%2C+M">Madhu Sudan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+G">Gabriel Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22606v1-abstract-short" style="display: inline;"> Motivated by recent advances in locally testable codes and quantum LDPCs based on robust testability of tensor product codes, we explore the local testability of tensor products of (an abstraction of) algebraic geometry codes. Such codes are parameterized by, in addition to standard parameters such as block length $n$ and dimension $k$, their genus $g$. We show that the tensor product of two algeb… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22606v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22606v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22606v1-abstract-full" style="display: none;"> Motivated by recent advances in locally testable codes and quantum LDPCs based on robust testability of tensor product codes, we explore the local testability of tensor products of (an abstraction of) algebraic geometry codes. Such codes are parameterized by, in addition to standard parameters such as block length $n$ and dimension $k$, their genus $g$. We show that the tensor product of two algebraic geometry codes is robustly locally testable provided $n = 惟((k+g)^2)$. Apart from Reed-Solomon codes, this seems to be the first explicit family of codes of super-constant dual distance that is robustly locally testable. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22606v1-abstract-full').style.display = 'none'; document.getElementById('2410.22606v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22029">arXiv:2410.22029</a> <span> [<a href="https://arxiv.org/pdf/2410.22029">pdf</a>, <a href="https://arxiv.org/format/2410.22029">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Are VLMs Really Blind </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ayush Singh</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+M">Mansi Gupta</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shivank Garg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22029v1-abstract-short" style="display: inline;"> Vision Language Models excel in handling a wide range of complex tasks, including Optical Character Recognition (OCR), Visual Question Answering (VQA), and advanced geometric reasoning. However, these models fail to perform well on low-level basic visual tasks which are especially easy for humans. Our goal in this work was to determine if these models are truly "blind" to geometric reasoning or if… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22029v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22029v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22029v1-abstract-full" style="display: none;"> Vision Language Models excel in handling a wide range of complex tasks, including Optical Character Recognition (OCR), Visual Question Answering (VQA), and advanced geometric reasoning. However, these models fail to perform well on low-level basic visual tasks which are especially easy for humans. Our goal in this work was to determine if these models are truly "blind" to geometric reasoning or if there are ways to enhance their capabilities in this area. Our work presents a novel automatic pipeline designed to extract key information from images in response to specific questions. Instead of just relying on direct VQA, we use question-derived keywords to create a caption that highlights important details in the image related to the question. This caption is then used by a language model to provide a precise answer to the question without requiring external fine-tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22029v1-abstract-full').style.display = 'none'; document.getElementById('2410.22029v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15314">arXiv:2410.15314</a> <span> [<a href="https://arxiv.org/pdf/2410.15314">pdf</a>, <a href="https://arxiv.org/format/2410.15314">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> KTCR: Improving Implicit Hate Detection with Knowledge Transfer driven Concept Refinement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+S">Samarth Garg</a>, <a href="/search/cs?searchtype=author&query=Kavuri%2C+V+H">Vivek Hruday Kavuri</a>, <a href="/search/cs?searchtype=author&query=Shroff%2C+G">Gargi Shroff</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+R">Rahul Mishra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15314v1-abstract-short" style="display: inline;"> The constant shifts in social and political contexts, driven by emerging social movements and political events, lead to new forms of hate content and previously unrecognized hate patterns that machine learning models may not have captured. Some recent literature proposes the data augmentation-based techniques to enrich existing hate datasets by incorporating samples that reveal new implicit hate p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15314v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15314v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15314v1-abstract-full" style="display: none;"> The constant shifts in social and political contexts, driven by emerging social movements and political events, lead to new forms of hate content and previously unrecognized hate patterns that machine learning models may not have captured. Some recent literature proposes the data augmentation-based techniques to enrich existing hate datasets by incorporating samples that reveal new implicit hate patterns. This approach aims to improve the model's performance on out-of-domain implicit hate instances. It is observed, that further addition of more samples for augmentation results in the decrease of the performance of the model. In this work, we propose a Knowledge Transfer-driven Concept Refinement method that distills and refines the concepts related to implicit hate samples through novel prototype alignment and concept losses, alongside data augmentation based on concept activation vectors. Experiments with several publicly available datasets show that incorporating additional implicit samples reflecting new hate patterns through concept refinement enhances the model's performance, surpassing baseline results while maintaining cross-dataset generalization capabilities.\footnote{DISCLAIMER: This paper contains explicit statements that are potentially offensive.} <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15314v1-abstract-full').style.display = 'none'; document.getElementById('2410.15314v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 4 figures, 2 algorithms, 5 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12124">arXiv:2410.12124</a> <span> [<a href="https://arxiv.org/pdf/2410.12124">pdf</a>, <a href="https://arxiv.org/format/2410.12124">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Affordance-Centric Policy Learning: Sample Efficient and Generalisable Robot Policy Learning using Affordance-Centric Task Frames </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rana%2C+K">Krishan Rana</a>, <a href="/search/cs?searchtype=author&query=Abou-Chakra%2C+J">Jad Abou-Chakra</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sourav Garg</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+R">Robert Lee</a>, <a href="/search/cs?searchtype=author&query=Reid%2C+I">Ian Reid</a>, <a href="/search/cs?searchtype=author&query=Suenderhauf%2C+N">Niko Suenderhauf</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12124v1-abstract-short" style="display: inline;"> Affordances are central to robotic manipulation, where most tasks can be simplified to interactions with task-specific regions on objects. By focusing on these key regions, we can abstract away task-irrelevant information, simplifying the learning process, and enhancing generalisation. In this paper, we propose an affordance-centric policy-learning approach that centres and appropriately \textit{o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12124v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12124v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12124v1-abstract-full" style="display: none;"> Affordances are central to robotic manipulation, where most tasks can be simplified to interactions with task-specific regions on objects. By focusing on these key regions, we can abstract away task-irrelevant information, simplifying the learning process, and enhancing generalisation. In this paper, we propose an affordance-centric policy-learning approach that centres and appropriately \textit{orients} a \textit{task frame} on these affordance regions allowing us to achieve both \textbf{intra-category invariance} -- where policies can generalise across different instances within the same object category -- and \textbf{spatial invariance} -- which enables consistent performance regardless of object placement in the environment. We propose a method to leverage existing generalist large vision models to extract and track these affordance frames, and demonstrate that our approach can learn manipulation tasks using behaviour cloning from as little as 10 demonstrations, with equivalent generalisation to an image-based policy trained on 305 demonstrations. We provide video demonstrations on our project site: https://affordance-policy.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12124v1-abstract-full').style.display = 'none'; document.getElementById('2410.12124v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Video can be found on our project website: https://affordance-policy.github.io</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07073">arXiv:2410.07073</a> <span> [<a href="https://arxiv.org/pdf/2410.07073">pdf</a>, <a href="https://arxiv.org/format/2410.07073">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Pixtral 12B </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Agrawal%2C+P">Pravesh Agrawal</a>, <a href="/search/cs?searchtype=author&query=Antoniak%2C+S">Szymon Antoniak</a>, <a href="/search/cs?searchtype=author&query=Hanna%2C+E+B">Emma Bou Hanna</a>, <a href="/search/cs?searchtype=author&query=Bout%2C+B">Baptiste Bout</a>, <a href="/search/cs?searchtype=author&query=Chaplot%2C+D">Devendra Chaplot</a>, <a href="/search/cs?searchtype=author&query=Chudnovsky%2C+J">Jessica Chudnovsky</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D">Diogo Costa</a>, <a href="/search/cs?searchtype=author&query=De+Monicault%2C+B">Baudouin De Monicault</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Saurabh Garg</a>, <a href="/search/cs?searchtype=author&query=Gervet%2C+T">Theophile Gervet</a>, <a href="/search/cs?searchtype=author&query=Ghosh%2C+S">Soham Ghosh</a>, <a href="/search/cs?searchtype=author&query=H%C3%A9liou%2C+A">Am茅lie H茅liou</a>, <a href="/search/cs?searchtype=author&query=Jacob%2C+P">Paul Jacob</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+A+Q">Albert Q. Jiang</a>, <a href="/search/cs?searchtype=author&query=Khandelwal%2C+K">Kartik Khandelwal</a>, <a href="/search/cs?searchtype=author&query=Lacroix%2C+T">Timoth茅e Lacroix</a>, <a href="/search/cs?searchtype=author&query=Lample%2C+G">Guillaume Lample</a>, <a href="/search/cs?searchtype=author&query=Casas%2C+D+L">Diego Las Casas</a>, <a href="/search/cs?searchtype=author&query=Lavril%2C+T">Thibaut Lavril</a>, <a href="/search/cs?searchtype=author&query=Scao%2C+T+L">Teven Le Scao</a>, <a href="/search/cs?searchtype=author&query=Lo%2C+A">Andy Lo</a>, <a href="/search/cs?searchtype=author&query=Marshall%2C+W">William Marshall</a>, <a href="/search/cs?searchtype=author&query=Martin%2C+L">Louis Martin</a>, <a href="/search/cs?searchtype=author&query=Mensch%2C+A">Arthur Mensch</a>, <a href="/search/cs?searchtype=author&query=Muddireddy%2C+P">Pavankumar Muddireddy</a> , et al. (17 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07073v2-abstract-short" style="display: inline;"> We introduce Pixtral-12B, a 12--billion-parameter multimodal language model. Pixtral-12B is trained to understand both natural images and documents, achieving leading performance on various multimodal benchmarks, surpassing a number of larger models. Unlike many open-source models, Pixtral is also a cutting-edge text model for its size, and does not compromise on natural language performance to ex… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07073v2-abstract-full').style.display = 'inline'; document.getElementById('2410.07073v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07073v2-abstract-full" style="display: none;"> We introduce Pixtral-12B, a 12--billion-parameter multimodal language model. Pixtral-12B is trained to understand both natural images and documents, achieving leading performance on various multimodal benchmarks, surpassing a number of larger models. Unlike many open-source models, Pixtral is also a cutting-edge text model for its size, and does not compromise on natural language performance to excel in multimodal tasks. Pixtral uses a new vision encoder trained from scratch, which allows it to ingest images at their natural resolution and aspect ratio. This gives users flexibility on the number of tokens used to process an image. Pixtral is also able to process any number of images in its long context window of 128K tokens. Pixtral 12B substanially outperforms other open models of similar sizes (Llama-3.2 11B \& Qwen-2-VL 7B). It also outperforms much larger open models like Llama-3.2 90B while being 7x smaller. We further contribute an open-source benchmark, MM-MT-Bench, for evaluating vision-language models in practical scenarios, and provide detailed analysis and code for standardized evaluation protocols for multimodal LLMs. Pixtral-12B is released under Apache 2.0 license. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07073v2-abstract-full').style.display = 'none'; document.getElementById('2410.07073v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05928">arXiv:2410.05928</a> <span> [<a href="https://arxiv.org/pdf/2410.05928">pdf</a>, <a href="https://arxiv.org/format/2410.05928">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Beyond Captioning: Task-Specific Prompting for Improved VLM Performance in Mathematical Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ayush Singh</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+M">Mansi Gupta</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shivank Garg</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Abhinav Kumar</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+V">Vansh Agrawal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05928v1-abstract-short" style="display: inline;"> Vision-Language Models (VLMs) have transformed tasks requiring visual and reasoning abilities, such as image retrieval and Visual Question Answering (VQA). Despite their success, VLMs face significant challenges with tasks involving geometric reasoning, algebraic problem-solving, and counting. These limitations stem from difficulties effectively integrating multiple modalities and accurately inter… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05928v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05928v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05928v1-abstract-full" style="display: none;"> Vision-Language Models (VLMs) have transformed tasks requiring visual and reasoning abilities, such as image retrieval and Visual Question Answering (VQA). Despite their success, VLMs face significant challenges with tasks involving geometric reasoning, algebraic problem-solving, and counting. These limitations stem from difficulties effectively integrating multiple modalities and accurately interpreting geometry-related tasks. Various works claim that introducing a captioning pipeline before VQA tasks enhances performance. We incorporated this pipeline for tasks involving geometry, algebra, and counting. We found that captioning results are not generalizable, specifically with larger VLMs primarily trained on downstream QnA tasks showing random performance on math-related challenges. However, we present a promising alternative: task-based prompting, enriching the prompt with task-specific guidance. This approach shows promise and proves more effective than direct captioning methods for math-heavy problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05928v1-abstract-full').style.display = 'none'; document.getElementById('2410.05928v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05915">arXiv:2410.05915</a> <span> [<a href="https://arxiv.org/pdf/2410.05915">pdf</a>, <a href="https://arxiv.org/format/2410.05915">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Give me a hint: Can LLMs take a hint to solve math problems? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Agrawal%2C+V">Vansh Agrawal</a>, <a href="/search/cs?searchtype=author&query=Singla%2C+P">Pratham Singla</a>, <a href="/search/cs?searchtype=author&query=Miglani%2C+A+S">Amitoj Singh Miglani</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shivank Garg</a>, <a href="/search/cs?searchtype=author&query=Mangal%2C+A">Ayush Mangal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05915v2-abstract-short" style="display: inline;"> While state-of-the-art LLMs have shown poor logical and basic mathematical reasoning, recent works try to improve their problem-solving abilities using prompting techniques. We propose giving "hints" to improve the language model's performance on advanced mathematical problems, taking inspiration from how humans approach math pedagogically. We also test robustness to adversarial hints and demonstr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05915v2-abstract-full').style.display = 'inline'; document.getElementById('2410.05915v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05915v2-abstract-full" style="display: none;"> While state-of-the-art LLMs have shown poor logical and basic mathematical reasoning, recent works try to improve their problem-solving abilities using prompting techniques. We propose giving "hints" to improve the language model's performance on advanced mathematical problems, taking inspiration from how humans approach math pedagogically. We also test robustness to adversarial hints and demonstrate their sensitivity to them. We demonstrate the effectiveness of our approach by evaluating various diverse LLMs, presenting them with a broad set of problems of different difficulties and topics from the MATH dataset and comparing against techniques such as one-shot, few-shot, and chain of thought prompting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05915v2-abstract-full').style.display = 'none'; document.getElementById('2410.05915v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04447">arXiv:2410.04447</a> <span> [<a href="https://arxiv.org/pdf/2410.04447">pdf</a>, <a href="https://arxiv.org/format/2410.04447">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Attention Shift: Steering AI Away from Unsafe Content </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shivank Garg</a>, <a href="/search/cs?searchtype=author&query=Tiwari%2C+M">Manyana Tiwari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04447v1-abstract-short" style="display: inline;"> This study investigates the generation of unsafe or harmful content in state-of-the-art generative models, focusing on methods for restricting such generations. We introduce a novel training-free approach using attention reweighing to remove unsafe concepts without additional training during inference. We compare our method against existing ablation methods, evaluating the performance on both, dir… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04447v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04447v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04447v1-abstract-full" style="display: none;"> This study investigates the generation of unsafe or harmful content in state-of-the-art generative models, focusing on methods for restricting such generations. We introduce a novel training-free approach using attention reweighing to remove unsafe concepts without additional training during inference. We compare our method against existing ablation methods, evaluating the performance on both, direct and adversarial jailbreak prompts, using qualitative and quantitative metrics. We hypothesize potential reasons for the observed results and discuss the limitations and broader implications of content restriction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04447v1-abstract-full').style.display = 'none'; document.getElementById('2410.04447v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02080">arXiv:2410.02080</a> <span> [<a href="https://arxiv.org/pdf/2410.02080">pdf</a>, <a href="https://arxiv.org/format/2410.02080">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> EMMA: Efficient Visual Alignment in Multi-Modal LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ghazanfari%2C+S">Sara Ghazanfari</a>, <a href="/search/cs?searchtype=author&query=Araujo%2C+A">Alexandre Araujo</a>, <a href="/search/cs?searchtype=author&query=Krishnamurthy%2C+P">Prashanth Krishnamurthy</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a>, <a href="/search/cs?searchtype=author&query=Khorrami%2C+F">Farshad Khorrami</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02080v1-abstract-short" style="display: inline;"> Multi-modal Large Language Models (MLLMs) have recently exhibited impressive general-purpose capabilities by leveraging vision foundation models to encode the core concepts of images into representations. These are then combined with instructions and processed by the language model to generate high-quality responses. Despite significant progress in enhancing the language component, challenges pers… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02080v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02080v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02080v1-abstract-full" style="display: none;"> Multi-modal Large Language Models (MLLMs) have recently exhibited impressive general-purpose capabilities by leveraging vision foundation models to encode the core concepts of images into representations. These are then combined with instructions and processed by the language model to generate high-quality responses. Despite significant progress in enhancing the language component, challenges persist in optimally fusing visual encodings within the language model for task-specific adaptability. Recent research has focused on improving this fusion through modality adaptation modules but at the cost of significantly increased model complexity and training data needs. In this paper, we propose EMMA (Efficient Multi-Modal Adaptation), a lightweight cross-modality module designed to efficiently fuse visual and textual encodings, generating instruction-aware visual representations for the language model. Our key contributions include: (1) an efficient early fusion mechanism that integrates vision and language representations with minimal added parameters (less than 0.2% increase in model size), (2) an in-depth interpretability analysis that sheds light on the internal mechanisms of the proposed method; (3) comprehensive experiments that demonstrate notable improvements on both specialized and general benchmarks for MLLMs. Empirical results show that EMMA boosts performance across multiple tasks by up to 9.3% while significantly improving robustness against hallucinations. Our code is available at https://github.com/SaraGhazanfari/EMMA <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02080v1-abstract-full').style.display = 'none'; document.getElementById('2410.02080v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19293">arXiv:2409.19293</a> <span> [<a href="https://arxiv.org/pdf/2409.19293">pdf</a>, <a href="https://arxiv.org/format/2409.19293">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VLAD-BuFF: Burst-aware Fast Feature Aggregation for Visual Place Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khaliq%2C+A">Ahmad Khaliq</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Ming Xu</a>, <a href="/search/cs?searchtype=author&query=Hausler%2C+S">Stephen Hausler</a>, <a href="/search/cs?searchtype=author&query=Milford%2C+M">Michael Milford</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sourav Garg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19293v1-abstract-short" style="display: inline;"> Visual Place Recognition (VPR) is a crucial component of many visual localization pipelines for embodied agents. VPR is often formulated as an image retrieval task aimed at jointly learning local features and an aggregation method. The current state-of-the-art VPR methods rely on VLAD aggregation, which can be trained to learn a weighted contribution of features through their soft assignment to cl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19293v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19293v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19293v1-abstract-full" style="display: none;"> Visual Place Recognition (VPR) is a crucial component of many visual localization pipelines for embodied agents. VPR is often formulated as an image retrieval task aimed at jointly learning local features and an aggregation method. The current state-of-the-art VPR methods rely on VLAD aggregation, which can be trained to learn a weighted contribution of features through their soft assignment to cluster centers. However, this process has two key limitations. Firstly, the feature-to-cluster weighting does not account for over-represented repetitive structures within a cluster, e.g., shadows or window panes; this phenomenon is also referred to as the `burstiness' problem, classically solved by discounting repetitive features before aggregation. Secondly, feature to cluster comparisons are compute-intensive for state-of-the-art image encoders with high-dimensional local features. This paper addresses these limitations by introducing VLAD-BuFF with two novel contributions: i) a self-similarity based feature discounting mechanism to learn Burst-aware features within end-to-end VPR training, and ii) Fast Feature aggregation by reducing local feature dimensions specifically through PCA-initialized learnable pre-projection. We benchmark our method on 9 public datasets, where VLAD-BuFF sets a new state of the art. Our method is able to maintain its high recall even for 12x reduced local feature dimensions, thus enabling fast feature aggregation without compromising on recall. Through additional qualitative studies, we show how our proposed weighting method effectively downweights the non-distinctive features. Source code: https://github.com/Ahmedest61/VLAD-BuFF/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19293v1-abstract-full').style.display = 'none'; document.getElementById('2409.19293v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at ECCV 2024; Includes supplementary; 29 pages; 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18049">arXiv:2409.18049</a> <span> [<a href="https://arxiv.org/pdf/2409.18049">pdf</a>, <a href="https://arxiv.org/format/2409.18049">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Revisit Anything: Visual Place Recognition via Image Segment Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+K">Kartik Garg</a>, <a href="/search/cs?searchtype=author&query=Puligilla%2C+S+S">Sai Shubodh Puligilla</a>, <a href="/search/cs?searchtype=author&query=Kolathaya%2C+S">Shishir Kolathaya</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+M">Madhava Krishna</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sourav Garg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18049v1-abstract-short" style="display: inline;"> Accurately recognizing a revisited place is crucial for embodied agents to localize and navigate. This requires visual representations to be distinct, despite strong variations in camera viewpoint and scene appearance. Existing visual place recognition pipelines encode the "whole" image and search for matches. This poses a fundamental challenge in matching two images of the same place captured fro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18049v1-abstract-full').style.display = 'inline'; document.getElementById('2409.18049v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18049v1-abstract-full" style="display: none;"> Accurately recognizing a revisited place is crucial for embodied agents to localize and navigate. This requires visual representations to be distinct, despite strong variations in camera viewpoint and scene appearance. Existing visual place recognition pipelines encode the "whole" image and search for matches. This poses a fundamental challenge in matching two images of the same place captured from different camera viewpoints: "the similarity of what overlaps can be dominated by the dissimilarity of what does not overlap". We address this by encoding and searching for "image segments" instead of the whole images. We propose to use open-set image segmentation to decompose an image into `meaningful' entities (i.e., things and stuff). This enables us to create a novel image representation as a collection of multiple overlapping subgraphs connecting a segment with its neighboring segments, dubbed SuperSegment. Furthermore, to efficiently encode these SuperSegments into compact vector representations, we propose a novel factorized representation of feature aggregation. We show that retrieving these partial representations leads to significantly higher recognition recall than the typical whole image based retrieval. Our segments-based approach, dubbed SegVLAD, sets a new state-of-the-art in place recognition on a diverse selection of benchmark datasets, while being applicable to both generic and task-specialized image encoders. Finally, we demonstrate the potential of our method to ``revisit anything'' by evaluating our method on an object instance retrieval task, which bridges the two disparate areas of research: visual place recognition and object-goal navigation, through their common aim of recognizing goal objects specific to a place. Source code: https://github.com/AnyLoc/Revisit-Anything. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18049v1-abstract-full').style.display = 'none'; document.getElementById('2409.18049v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at ECCV 2024; Includes supplementary; 29 pages; 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16850">arXiv:2409.16850</a> <span> [<a href="https://arxiv.org/pdf/2409.16850">pdf</a>, <a href="https://arxiv.org/format/2409.16850">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Robust Scene Change Detection Using Visual Foundation Models and Cross-Attention Mechanisms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chun-Jung Lin</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sourav Garg</a>, <a href="/search/cs?searchtype=author&query=Chin%2C+T">Tat-Jun Chin</a>, <a href="/search/cs?searchtype=author&query=Dayoub%2C+F">Feras Dayoub</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16850v1-abstract-short" style="display: inline;"> We present a novel method for scene change detection that leverages the robust feature extraction capabilities of a visual foundational model, DINOv2, and integrates full-image cross-attention to address key challenges such as varying lighting, seasonal variations, and viewpoint differences. In order to effectively learn correspondences and mis-correspondences between an image pair for the change… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16850v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16850v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16850v1-abstract-full" style="display: none;"> We present a novel method for scene change detection that leverages the robust feature extraction capabilities of a visual foundational model, DINOv2, and integrates full-image cross-attention to address key challenges such as varying lighting, seasonal variations, and viewpoint differences. In order to effectively learn correspondences and mis-correspondences between an image pair for the change detection task, we propose to a) ``freeze'' the backbone in order to retain the generality of dense foundation features, and b) employ ``full-image'' cross-attention to better tackle the viewpoint variations between the image pair. We evaluate our approach on two benchmark datasets, VL-CMU-CD and PSCD, along with their viewpoint-varied versions. Our experiments demonstrate significant improvements in F1-score, particularly in scenarios involving geometric changes between image pairs. The results indicate our method's superior generalization capabilities over existing state-of-the-art approaches, showing robustness against photometric and geometric variations as well as better overall generalization when fine-tuned to adapt to new environments. Detailed ablation studies further validate the contributions of each component in our architecture. Source code will be made publicly available upon acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16850v1-abstract-full').style.display = 'none'; document.getElementById('2409.16850v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10161">arXiv:2409.10161</a> <span> [<a href="https://arxiv.org/pdf/2409.10161">pdf</a>, <a href="https://arxiv.org/format/2409.10161">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SplatSim: Zero-Shot Sim2Real Transfer of RGB Manipulation Policies Using Gaussian Splatting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qureshi%2C+M+N">Mohammad Nomaan Qureshi</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sparsh Garg</a>, <a href="/search/cs?searchtype=author&query=Yandun%2C+F">Francisco Yandun</a>, <a href="/search/cs?searchtype=author&query=Held%2C+D">David Held</a>, <a href="/search/cs?searchtype=author&query=Kantor%2C+G">George Kantor</a>, <a href="/search/cs?searchtype=author&query=Silwal%2C+A">Abhisesh Silwal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10161v3-abstract-short" style="display: inline;"> Sim2Real transfer, particularly for manipulation policies relying on RGB images, remains a critical challenge in robotics due to the significant domain shift between synthetic and real-world visual data. In this paper, we propose SplatSim, a novel framework that leverages Gaussian Splatting as the primary rendering primitive to reduce the Sim2Real gap for RGB-based manipulation policies. By replac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10161v3-abstract-full').style.display = 'inline'; document.getElementById('2409.10161v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10161v3-abstract-full" style="display: none;"> Sim2Real transfer, particularly for manipulation policies relying on RGB images, remains a critical challenge in robotics due to the significant domain shift between synthetic and real-world visual data. In this paper, we propose SplatSim, a novel framework that leverages Gaussian Splatting as the primary rendering primitive to reduce the Sim2Real gap for RGB-based manipulation policies. By replacing traditional mesh representations with Gaussian Splats in simulators, SplatSim produces highly photorealistic synthetic data while maintaining the scalability and cost-efficiency of simulation. We demonstrate the effectiveness of our framework by training manipulation policies within SplatSim and deploying them in the real world in a zero-shot manner, achieving an average success rate of 86.25%, compared to 97.5% for policies trained on real-world data. Videos can be found on our project page: https://splatsim.github.io <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10161v3-abstract-full').style.display = 'none'; document.getElementById('2409.10161v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08179">arXiv:2408.08179</a> <span> [<a href="https://arxiv.org/pdf/2408.08179">pdf</a>, <a href="https://arxiv.org/format/2408.08179">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Machine learning empowered Modulation detection for OFDM-based signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pourranjbar%2C+A">Ali Pourranjbar</a>, <a href="/search/cs?searchtype=author&query=Kaddoum%2C+G">Georges Kaddoum</a>, <a href="/search/cs?searchtype=author&query=Mba%2C+V+A">Verdier Assoume Mba</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sahil Garg</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+S">Satinder Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08179v1-abstract-short" style="display: inline;"> We propose a blind ML-based modulation detection for OFDM-based technologies. Unlike previous works that assume an ideal environment with precise knowledge of subcarrier count and cyclic prefix location, we consider blind modulation detection while accounting for realistic environmental parameters and imperfections. Our approach employs a ResNet network to simultaneously detect the modulation type… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08179v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08179v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08179v1-abstract-full" style="display: none;"> We propose a blind ML-based modulation detection for OFDM-based technologies. Unlike previous works that assume an ideal environment with precise knowledge of subcarrier count and cyclic prefix location, we consider blind modulation detection while accounting for realistic environmental parameters and imperfections. Our approach employs a ResNet network to simultaneously detect the modulation type and accurately locate the cyclic prefix. Specifically, after eliminating the environmental impact from the signal and accurately extracting the OFDM symbols, we convert these symbols into scatter plots. Due to their unique shapes, these scatter plots are then classified using ResNet. As a result, our proposed modulation classification method can be applied to any OFDM-based technology without prior knowledge of the transmitted signal. We evaluate its performance across various modulation schemes and subcarrier numbers. Simulation results show that our method achieves a modulation detection accuracy exceeding $80\%$ at an SNR of $10$ dB and $95\%$ at an SNR of $25$ dB. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08179v1-abstract-full').style.display = 'none'; document.getElementById('2408.08179v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05890">arXiv:2408.05890</a> <span> [<a href="https://arxiv.org/pdf/2408.05890">pdf</a>, <a href="https://arxiv.org/format/2408.05890">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3656019.3676898">10.1145/3656019.3676898 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> SZKP: A Scalable Accelerator Architecture for Zero-Knowledge Proofs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Daftardar%2C+A">Alhad Daftardar</a>, <a href="/search/cs?searchtype=author&query=Reagen%2C+B">Brandon Reagen</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05890v1-abstract-short" style="display: inline;"> Zero-Knowledge Proofs (ZKPs) are an emergent paradigm in verifiable computing. In the context of applications like cloud computing, ZKPs can be used by a client (called the verifier) to verify the service provider (called the prover) is in fact performing the correct computation based on a public input. A recently prominent variant of ZKPs is zkSNARKs, generating succinct proofs that can be rapidl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05890v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05890v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05890v1-abstract-full" style="display: none;"> Zero-Knowledge Proofs (ZKPs) are an emergent paradigm in verifiable computing. In the context of applications like cloud computing, ZKPs can be used by a client (called the verifier) to verify the service provider (called the prover) is in fact performing the correct computation based on a public input. A recently prominent variant of ZKPs is zkSNARKs, generating succinct proofs that can be rapidly verified by the end user. However, proof generation itself is very time consuming per transaction. Two key primitives in proof generation are the Number Theoretic Transform (NTT) and Multi-scalar Multiplication (MSM). These primitives are prime candidates for hardware acceleration, and prior works have looked at GPU implementations and custom RTL. However, both algorithms involve complex dataflow patterns -- standard NTTs have irregular memory accesses for butterfly computations from stage to stage, and MSMs using Pippenger's algorithm have data-dependent memory accesses for partial sum calculations. We present SZKP, a scalable accelerator framework that is the first ASIC to accelerate an entire proof on-chip by leveraging structured dataflows for both NTTs and MSMs. SZKP achieves conservative full-proof speedups of over 400$\times$, 3$\times$, and 12$\times$ over CPU, ASIC, and GPU implementations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05890v1-abstract-full').style.display = 'none'; document.getElementById('2408.05890v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to the 33rd International Conference on Parallel Architectures and Compilation Techniques (PACT), 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00118">arXiv:2408.00118</a> <span> [<a href="https://arxiv.org/pdf/2408.00118">pdf</a>, <a href="https://arxiv.org/format/2408.00118">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gemma 2: Improving Open Language Models at a Practical Size </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemma+Team"> Gemma Team</a>, <a href="/search/cs?searchtype=author&query=Riviere%2C+M">Morgane Riviere</a>, <a href="/search/cs?searchtype=author&query=Pathak%2C+S">Shreya Pathak</a>, <a href="/search/cs?searchtype=author&query=Sessa%2C+P+G">Pier Giuseppe Sessa</a>, <a href="/search/cs?searchtype=author&query=Hardin%2C+C">Cassidy Hardin</a>, <a href="/search/cs?searchtype=author&query=Bhupatiraju%2C+S">Surya Bhupatiraju</a>, <a href="/search/cs?searchtype=author&query=Hussenot%2C+L">L茅onard Hussenot</a>, <a href="/search/cs?searchtype=author&query=Mesnard%2C+T">Thomas Mesnard</a>, <a href="/search/cs?searchtype=author&query=Shahriari%2C+B">Bobak Shahriari</a>, <a href="/search/cs?searchtype=author&query=Ram%C3%A9%2C+A">Alexandre Ram茅</a>, <a href="/search/cs?searchtype=author&query=Ferret%2C+J">Johan Ferret</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Peter Liu</a>, <a href="/search/cs?searchtype=author&query=Tafti%2C+P">Pouya Tafti</a>, <a href="/search/cs?searchtype=author&query=Friesen%2C+A">Abe Friesen</a>, <a href="/search/cs?searchtype=author&query=Casbon%2C+M">Michelle Casbon</a>, <a href="/search/cs?searchtype=author&query=Ramos%2C+S">Sabela Ramos</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+R">Ravin Kumar</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+C+L">Charline Le Lan</a>, <a href="/search/cs?searchtype=author&query=Jerome%2C+S">Sammy Jerome</a>, <a href="/search/cs?searchtype=author&query=Tsitsulin%2C+A">Anton Tsitsulin</a>, <a href="/search/cs?searchtype=author&query=Vieillard%2C+N">Nino Vieillard</a>, <a href="/search/cs?searchtype=author&query=Stanczyk%2C+P">Piotr Stanczyk</a>, <a href="/search/cs?searchtype=author&query=Girgin%2C+S">Sertan Girgin</a>, <a href="/search/cs?searchtype=author&query=Momchev%2C+N">Nikola Momchev</a>, <a href="/search/cs?searchtype=author&query=Hoffman%2C+M">Matt Hoffman</a> , et al. (173 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00118v3-abstract-short" style="display: inline;"> In this work, we introduce Gemma 2, a new addition to the Gemma family of lightweight, state-of-the-art open models, ranging in scale from 2 billion to 27 billion parameters. In this new version, we apply several known technical modifications to the Transformer architecture, such as interleaving local-global attentions (Beltagy et al., 2020a) and group-query attention (Ainslie et al., 2023). We al… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00118v3-abstract-full').style.display = 'inline'; document.getElementById('2408.00118v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00118v3-abstract-full" style="display: none;"> In this work, we introduce Gemma 2, a new addition to the Gemma family of lightweight, state-of-the-art open models, ranging in scale from 2 billion to 27 billion parameters. In this new version, we apply several known technical modifications to the Transformer architecture, such as interleaving local-global attentions (Beltagy et al., 2020a) and group-query attention (Ainslie et al., 2023). We also train the 2B and 9B models with knowledge distillation (Hinton et al., 2015) instead of next token prediction. The resulting models deliver the best performance for their size, and even offer competitive alternatives to models that are 2-3 times bigger. We release all our models to the community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00118v3-abstract-full').style.display = 'none'; document.getElementById('2408.00118v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20438">arXiv:2407.20438</a> <span> [<a href="https://arxiv.org/pdf/2407.20438">pdf</a>, <a href="https://arxiv.org/format/2407.20438">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Generating Gender Alternatives in Machine Translation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sarthak Garg</a>, <a href="/search/cs?searchtype=author&query=Gheini%2C+M">Mozhdeh Gheini</a>, <a href="/search/cs?searchtype=author&query=Emmanuel%2C+C">Clara Emmanuel</a>, <a href="/search/cs?searchtype=author&query=Likhomanenko%2C+T">Tatiana Likhomanenko</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Q">Qin Gao</a>, <a href="/search/cs?searchtype=author&query=Paulik%2C+M">Matthias Paulik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20438v1-abstract-short" style="display: inline;"> Machine translation (MT) systems often translate terms with ambiguous gender (e.g., English term "the nurse") into the gendered form that is most prevalent in the systems' training data (e.g., "enfermera", the Spanish term for a female nurse). This often reflects and perpetuates harmful stereotypes present in society. With MT user interfaces in mind that allow for resolving gender ambiguity in a f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20438v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20438v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20438v1-abstract-full" style="display: none;"> Machine translation (MT) systems often translate terms with ambiguous gender (e.g., English term "the nurse") into the gendered form that is most prevalent in the systems' training data (e.g., "enfermera", the Spanish term for a female nurse). This often reflects and perpetuates harmful stereotypes present in society. With MT user interfaces in mind that allow for resolving gender ambiguity in a frictionless manner, we study the problem of generating all grammatically correct gendered translation alternatives. We open source train and test datasets for five language pairs and establish benchmarks for this task. Our key technical contribution is a novel semi-supervised solution for generating alternatives that integrates seamlessly with standard MT models and maintains high performance without requiring additional components or increasing inference overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20438v1-abstract-full').style.display = 'none'; document.getElementById('2407.20438v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">GeBNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18276">arXiv:2407.18276</a> <span> [<a href="https://arxiv.org/pdf/2407.18276">pdf</a>, <a href="https://arxiv.org/format/2407.18276">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Rome was Not Built in a Single Step: Hierarchical Prompting for LLM-based Chip Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nakkab%2C+A">Andre Nakkab</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S+Q">Sai Qian Zhang</a>, <a href="/search/cs?searchtype=author&query=Karri%2C+R">Ramesh Karri</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18276v3-abstract-short" style="display: inline;"> Large Language Models (LLMs) are effective in computer hardware synthesis via hardware description language (HDL) generation. However, LLM-assisted approaches for HDL generation struggle when handling complex tasks. We introduce a suite of hierarchical prompting techniques which facilitate efficient stepwise design methods, and develop a generalizable automation pipeline for the process. To evalua… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18276v3-abstract-full').style.display = 'inline'; document.getElementById('2407.18276v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18276v3-abstract-full" style="display: none;"> Large Language Models (LLMs) are effective in computer hardware synthesis via hardware description language (HDL) generation. However, LLM-assisted approaches for HDL generation struggle when handling complex tasks. We introduce a suite of hierarchical prompting techniques which facilitate efficient stepwise design methods, and develop a generalizable automation pipeline for the process. To evaluate these techniques, we present a benchmark set of hardware designs which have solutions with or without architectural hierarchy. Using these benchmarks, we compare various open-source and proprietary LLMs, including our own fine-tuned Code Llama-Verilog model. Our hierarchical methods automatically produce successful designs for complex hardware modules that standard flat prompting methods cannot achieve, allowing smaller open-source LLMs to compete with large proprietary models. Hierarchical prompting reduces HDL generation time and yields savings on LLM costs. Our experiments detail which LLMs are capable of which applications, and how to apply hierarchical methods in various modes. We explore case studies of generating complex cores using automatic scripted hierarchical prompts, including the first-ever LLM-designed processor with no human feedback. Tools for the Recurrent Optimization via Machine Editing (ROME) method can be found at https://github.com/ajn313/ROME-LLM <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18276v3-abstract-full').style.display = 'none'; document.getElementById('2407.18276v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at MLCAD '24. 10 pages, 7 figures, 5 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.16119">arXiv:2407.16119</a> <span> [<a href="https://arxiv.org/pdf/2407.16119">pdf</a>, <a href="https://arxiv.org/format/2407.16119">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Uncertainty-Aware Deep Neural Representations for Visual Analysis of Vector Field Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Atul Kumar</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a>, <a href="/search/cs?searchtype=author&query=Dutta%2C+S">Soumya Dutta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.16119v2-abstract-short" style="display: inline;"> The widespread use of Deep Neural Networks (DNNs) has recently resulted in their application to challenging scientific visualization tasks. While advanced DNNs demonstrate impressive generalization abilities, understanding factors like prediction quality, confidence, robustness, and uncertainty is crucial. These insights aid application scientists in making informed decisions. However, DNNs lack i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16119v2-abstract-full').style.display = 'inline'; document.getElementById('2407.16119v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.16119v2-abstract-full" style="display: none;"> The widespread use of Deep Neural Networks (DNNs) has recently resulted in their application to challenging scientific visualization tasks. While advanced DNNs demonstrate impressive generalization abilities, understanding factors like prediction quality, confidence, robustness, and uncertainty is crucial. These insights aid application scientists in making informed decisions. However, DNNs lack inherent mechanisms to measure prediction uncertainty, prompting the creation of distinct frameworks for constructing robust uncertainty-aware models tailored to various visualization tasks. In this work, we develop uncertainty-aware implicit neural representations to model steady-state vector fields effectively. We comprehensively evaluate the efficacy of two principled deep uncertainty estimation techniques: (1) Deep Ensemble and (2) Monte Carlo Dropout, aimed at enabling uncertainty-informed visual analysis of features within steady vector field data. Our detailed exploration using several vector data sets indicate that uncertainty-aware models generate informative visualization results of vector field features. Furthermore, incorporating prediction uncertainty improves the resilience and interpretability of our DNN model, rendering it applicable for the analysis of non-trivial vector field data sets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16119v2-abstract-full').style.display = 'none'; document.getElementById('2407.16119v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at IEEE Visualization 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02929">arXiv:2407.02929</a> <span> [<a href="https://arxiv.org/pdf/2407.02929">pdf</a>, <a href="https://arxiv.org/format/2407.02929">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> A Hybrid Reactive Routing Protocol for Decentralized UAV Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shivam Garg</a>, <a href="/search/cs?searchtype=author&query=Ihler%2C+A">Alexander Ihler</a>, <a href="/search/cs?searchtype=author&query=Bentley%2C+E+S">Elizabeth Serena Bentley</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+S">Sunil Kumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02929v1-abstract-short" style="display: inline;"> Wireless networks consisting of low SWaP, FW-UAVs are used in many applications, such as monitoring, search and surveillance of inaccessible areas. A decentralized and autonomous approach ensures robustness to failures; the UAVs explore and sense within the area and forward their information, in a multihop manner, to nearby aerial gateway nodes. However, the unpredictable nature of the events, rel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02929v1-abstract-full').style.display = 'inline'; document.getElementById('2407.02929v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02929v1-abstract-full" style="display: none;"> Wireless networks consisting of low SWaP, FW-UAVs are used in many applications, such as monitoring, search and surveillance of inaccessible areas. A decentralized and autonomous approach ensures robustness to failures; the UAVs explore and sense within the area and forward their information, in a multihop manner, to nearby aerial gateway nodes. However, the unpredictable nature of the events, relatively high speed of UAVs, and dynamic UAV trajectories cause the network topology to change significantly over time, resulting in frequent route breaks. A holistic routing approach is needed to support multiple traffic flows in these networks to provide mobility- and congestion-aware, high-quality routes when needed, with low control and computational overheads, using the information collected in a distributed manner. Existing routing schemes do not address all the mentioned issues. We present a hybrid reactive routing protocol for decentralized UAV networks. Our scheme searches routes on-demand, monitors a region around the selected route (the pipe), and proactively switches to an alternative route before the current route's quality degrades below a threshold. We empirically evaluate the impact of pipe width and node density on our ability to find alternate high-quality routes within the pipe and the overhead required to maintain the pipe. Compared to existing reactive routing schemes, our approach achieves higher throughput and reduces the number of route discoveries, overhead, and resulting flow interruptions at different traffic loads, node densities and speeds. Despite having limited network topology information, and low overhead and route computation complexity, our proposed scheme achieves superior throughput to proactive optimized link state routing scheme at different network and traffic settings. We also evaluate the relative performance of reactive and proactive routing schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02929v1-abstract-full').style.display = 'none'; document.getElementById('2407.02929v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19549">arXiv:2406.19549</a> <span> [<a href="https://arxiv.org/pdf/2406.19549">pdf</a>, <a href="https://arxiv.org/format/2406.19549">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ASCENT: Amplifying Power Side-Channel Resilience via Learning & Monte-Carlo Tree Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bhandari%2C+J">Jitendra Bhandari</a>, <a href="/search/cs?searchtype=author&query=Chowdhury%2C+A+B">Animesh Basak Chowdhury</a>, <a href="/search/cs?searchtype=author&query=Nabeel%2C+M">Mohammed Nabeel</a>, <a href="/search/cs?searchtype=author&query=Sinanoglu%2C+O">Ozgur Sinanoglu</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a>, <a href="/search/cs?searchtype=author&query=Karri%2C+R">Ramesh Karri</a>, <a href="/search/cs?searchtype=author&query=Knechtel%2C+J">Johann Knechtel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19549v2-abstract-short" style="display: inline;"> Power side-channel (PSC) analysis is pivotal for securing cryptographic hardware. Prior art focused on securing gate-level netlists obtained as-is from chip design automation, neglecting all the complexities and potential side-effects for security arising from the design automation process. That is, automation traditionally prioritizes power, performance, and area (PPA), sidelining security. We pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19549v2-abstract-full').style.display = 'inline'; document.getElementById('2406.19549v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19549v2-abstract-full" style="display: none;"> Power side-channel (PSC) analysis is pivotal for securing cryptographic hardware. Prior art focused on securing gate-level netlists obtained as-is from chip design automation, neglecting all the complexities and potential side-effects for security arising from the design automation process. That is, automation traditionally prioritizes power, performance, and area (PPA), sidelining security. We propose a "security-first" approach, refining the logic synthesis stage to enhance the overall resilience of PSC countermeasures. We introduce ASCENT, a learning-and-search-based framework that (i) drastically reduces the time for post-design PSC evaluation and (ii) explores the security-vs-PPA design space. Thus, ASCENT enables an efficient exploration of a large number of candidate netlists, leading to an improvement in PSC resilience compared to regular PPA-optimized netlists. ASCENT is up to 120x faster than traditional PSC analysis and yields a 3.11x improvement for PSC resilience of state-of-the-art PSC countermeasures <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19549v2-abstract-full').style.display = 'none'; document.getElementById('2406.19549v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at 2024 ACM/IEEE International Conference on Computer-Aided Design</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17132">arXiv:2406.17132</a> <span> [<a href="https://arxiv.org/pdf/2406.17132">pdf</a>, <a href="https://arxiv.org/format/2406.17132">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> LLM-Aided Testbench Generation and Bug Detection for Finite-State Machines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bhandari%2C+J">Jitendra Bhandari</a>, <a href="/search/cs?searchtype=author&query=Knechtel%2C+J">Johann Knechtel</a>, <a href="/search/cs?searchtype=author&query=Narayanaswamy%2C+R">Ramesh Narayanaswamy</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a>, <a href="/search/cs?searchtype=author&query=Karri%2C+R">Ramesh Karri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17132v1-abstract-short" style="display: inline;"> This work investigates the potential of tailoring Large Language Models (LLMs), specifically GPT3.5 and GPT4, for the domain of chip testing. A key aspect of chip design is functional testing, which relies on testbenches to evaluate the functionality and coverage of Register-Transfer Level (RTL) designs. We aim to enhance testbench generation by incorporating feedback from commercial-grade Electro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17132v1-abstract-full').style.display = 'inline'; document.getElementById('2406.17132v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17132v1-abstract-full" style="display: none;"> This work investigates the potential of tailoring Large Language Models (LLMs), specifically GPT3.5 and GPT4, for the domain of chip testing. A key aspect of chip design is functional testing, which relies on testbenches to evaluate the functionality and coverage of Register-Transfer Level (RTL) designs. We aim to enhance testbench generation by incorporating feedback from commercial-grade Electronic Design Automation (EDA) tools into LLMs. Through iterative feedback from these tools, we refine the testbenches to achieve improved test coverage. Our case studies present promising results, demonstrating that this approach can effectively enhance test coverage. By integrating EDA tool feedback, the generated testbenches become more accurate in identifying potential issues in the RTL design. Furthermore, we extended our study to use this enhanced test coverage framework for detecting bugs in the RTL implementations <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17132v1-abstract-full').style.display = 'none'; document.getElementById('2406.17132v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.14532">arXiv:2406.14532</a> <span> [<a href="https://arxiv.org/pdf/2406.14532">pdf</a>, <a href="https://arxiv.org/format/2406.14532">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RL on Incorrect Synthetic Data Scales the Efficiency of LLM Math Reasoning by Eight-Fold </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Setlur%2C+A">Amrith Setlur</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Saurabh Garg</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+X">Xinyang Geng</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+N">Naman Garg</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+V">Virginia Smith</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Aviral Kumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.14532v1-abstract-short" style="display: inline;"> Training on model-generated synthetic data is a promising approach for finetuning LLMs, but it remains unclear when it helps or hurts. In this paper, we investigate this question for math reasoning via an empirical study, followed by building a conceptual understanding of our observations. First, we find that while the typical approach of finetuning a model on synthetic correct or positive problem… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14532v1-abstract-full').style.display = 'inline'; document.getElementById('2406.14532v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.14532v1-abstract-full" style="display: none;"> Training on model-generated synthetic data is a promising approach for finetuning LLMs, but it remains unclear when it helps or hurts. In this paper, we investigate this question for math reasoning via an empirical study, followed by building a conceptual understanding of our observations. First, we find that while the typical approach of finetuning a model on synthetic correct or positive problem-solution pairs generated by capable models offers modest performance gains, sampling more correct solutions from the finetuned learner itself followed by subsequent fine-tuning on this self-generated data $\textbf{doubles}$ the efficiency of the same synthetic problems. At the same time, training on model-generated positives can amplify various spurious correlations, resulting in flat or even inverse scaling trends as the amount of data increases. Surprisingly, we find that several of these issues can be addressed if we also utilize negative responses, i.e., model-generated responses that are deemed incorrect by a final answer verifier. Crucially, these negatives must be constructed such that the training can appropriately recover the utility or advantage of each intermediate step in the negative response. With this per-step scheme, we are able to attain consistent gains over only positive data, attaining performance similar to amplifying the amount of synthetic data by $\mathbf{8 \times}$. We show that training on per-step negatives can help to unlearn spurious correlations in the positive data, and is equivalent to advantage-weighted reinforcement learning (RL), implying that it inherits robustness benefits of RL over imitating positive data alone. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14532v1-abstract-full').style.display = 'none'; document.getElementById('2406.14532v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13473">arXiv:2406.13473</a> <span> [<a href="https://arxiv.org/pdf/2406.13473">pdf</a>, <a href="https://arxiv.org/format/2406.13473">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Snowy Scenes,Clear Detections: A Robust Model for Traffic Light Detection in Adverse Weather Conditions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shivank Garg</a>, <a href="/search/cs?searchtype=author&query=Baghel%2C+A">Abhishek Baghel</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Amit Agarwal</a>, <a href="/search/cs?searchtype=author&query=Toshniwal%2C+D">Durga Toshniwal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13473v1-abstract-short" style="display: inline;"> With the rise of autonomous vehicles and advanced driver-assistance systems (ADAS), ensuring reliable object detection in all weather conditions is crucial for safety and efficiency. Adverse weather like snow, rain, and fog presents major challenges for current detection systems, often resulting in failures and potential safety risks. This paper introduces a novel framework and pipeline designed t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13473v1-abstract-full').style.display = 'inline'; document.getElementById('2406.13473v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13473v1-abstract-full" style="display: none;"> With the rise of autonomous vehicles and advanced driver-assistance systems (ADAS), ensuring reliable object detection in all weather conditions is crucial for safety and efficiency. Adverse weather like snow, rain, and fog presents major challenges for current detection systems, often resulting in failures and potential safety risks. This paper introduces a novel framework and pipeline designed to improve object detection under such conditions, focusing on traffic signal detection where traditional methods often fail due to domain shifts caused by adverse weather. We provide a comprehensive analysis of the limitations of existing techniques. Our proposed pipeline significantly enhances detection accuracy in snow, rain, and fog. Results show a 40.8% improvement in average IoU and F1 scores compared to naive fine-tuning and a 22.4% performance increase in domain shift scenarios, such as training on artificial snow and testing on rain images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13473v1-abstract-full').style.display = 'none'; document.getElementById('2406.13473v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.12592">arXiv:2406.12592</a> <span> [<a href="https://arxiv.org/pdf/2406.12592">pdf</a>, <a href="https://arxiv.org/format/2406.12592">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Unmasking the Veil: An Investigation into Concept Ablation for Privacy and Copyright Protection in Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shivank Garg</a>, <a href="/search/cs?searchtype=author&query=Tiwari%2C+M">Manyana Tiwari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.12592v1-abstract-short" style="display: inline;"> In this paper, we extend the study of concept ablation within pre-trained models as introduced in 'Ablating Concepts in Text-to-Image Diffusion Models' by (Kumari et al.,2022). Our work focuses on reproducing the results achieved by the different variants of concept ablation proposed and validated through predefined metrics. We also introduce a novel variant of concept ablation, namely 'trademark… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12592v1-abstract-full').style.display = 'inline'; document.getElementById('2406.12592v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.12592v1-abstract-full" style="display: none;"> In this paper, we extend the study of concept ablation within pre-trained models as introduced in 'Ablating Concepts in Text-to-Image Diffusion Models' by (Kumari et al.,2022). Our work focuses on reproducing the results achieved by the different variants of concept ablation proposed and validated through predefined metrics. We also introduce a novel variant of concept ablation, namely 'trademark ablation'. This variant combines the principles of memorization and instance ablation to tackle the nuanced influence of proprietary or branded elements in model outputs. Further, our research contributions include an observational analysis of the model's limitations. Moreover, we investigate the model's behavior in response to ablation leakage-inducing prompts, which aim to indirectly ablate concepts, revealing insights into the model's resilience and adaptability. We also observe the model's performance degradation on images generated by concepts far from its target ablation concept, documented in the appendix. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12592v1-abstract-full').style.display = 'none'; document.getElementById('2406.12592v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11794">arXiv:2406.11794</a> <span> [<a href="https://arxiv.org/pdf/2406.11794">pdf</a>, <a href="https://arxiv.org/format/2406.11794">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DataComp-LM: In search of the next generation of training sets for language models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jeffrey Li</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+A">Alex Fang</a>, <a href="/search/cs?searchtype=author&query=Smyrnis%2C+G">Georgios Smyrnis</a>, <a href="/search/cs?searchtype=author&query=Ivgi%2C+M">Maor Ivgi</a>, <a href="/search/cs?searchtype=author&query=Jordan%2C+M">Matt Jordan</a>, <a href="/search/cs?searchtype=author&query=Gadre%2C+S">Samir Gadre</a>, <a href="/search/cs?searchtype=author&query=Bansal%2C+H">Hritik Bansal</a>, <a href="/search/cs?searchtype=author&query=Guha%2C+E">Etash Guha</a>, <a href="/search/cs?searchtype=author&query=Keh%2C+S">Sedrick Keh</a>, <a href="/search/cs?searchtype=author&query=Arora%2C+K">Kushal Arora</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Saurabh Garg</a>, <a href="/search/cs?searchtype=author&query=Xin%2C+R">Rui Xin</a>, <a href="/search/cs?searchtype=author&query=Muennighoff%2C+N">Niklas Muennighoff</a>, <a href="/search/cs?searchtype=author&query=Heckel%2C+R">Reinhard Heckel</a>, <a href="/search/cs?searchtype=author&query=Mercat%2C+J">Jean Mercat</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mayee Chen</a>, <a href="/search/cs?searchtype=author&query=Gururangan%2C+S">Suchin Gururangan</a>, <a href="/search/cs?searchtype=author&query=Wortsman%2C+M">Mitchell Wortsman</a>, <a href="/search/cs?searchtype=author&query=Albalak%2C+A">Alon Albalak</a>, <a href="/search/cs?searchtype=author&query=Bitton%2C+Y">Yonatan Bitton</a>, <a href="/search/cs?searchtype=author&query=Nezhurina%2C+M">Marianna Nezhurina</a>, <a href="/search/cs?searchtype=author&query=Abbas%2C+A">Amro Abbas</a>, <a href="/search/cs?searchtype=author&query=Hsieh%2C+C">Cheng-Yu Hsieh</a>, <a href="/search/cs?searchtype=author&query=Ghosh%2C+D">Dhruba Ghosh</a>, <a href="/search/cs?searchtype=author&query=Gardner%2C+J">Josh Gardner</a> , et al. (34 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11794v3-abstract-short" style="display: inline;"> We introduce DataComp for Language Models (DCLM), a testbed for controlled dataset experiments with the goal of improving language models. As part of DCLM, we provide a standardized corpus of 240T tokens extracted from Common Crawl, effective pretraining recipes based on the OpenLM framework, and a broad suite of 53 downstream evaluations. Participants in the DCLM benchmark can experiment with dat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11794v3-abstract-full').style.display = 'inline'; document.getElementById('2406.11794v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11794v3-abstract-full" style="display: none;"> We introduce DataComp for Language Models (DCLM), a testbed for controlled dataset experiments with the goal of improving language models. As part of DCLM, we provide a standardized corpus of 240T tokens extracted from Common Crawl, effective pretraining recipes based on the OpenLM framework, and a broad suite of 53 downstream evaluations. Participants in the DCLM benchmark can experiment with data curation strategies such as deduplication, filtering, and data mixing at model scales ranging from 412M to 7B parameters. As a baseline for DCLM, we conduct extensive experiments and find that model-based filtering is key to assembling a high-quality training set. The resulting dataset, DCLM-Baseline enables training a 7B parameter language model from scratch to 64% 5-shot accuracy on MMLU with 2.6T training tokens. Compared to MAP-Neo, the previous state-of-the-art in open-data language models, DCLM-Baseline represents a 6.6 percentage point improvement on MMLU while being trained with 40% less compute. Our baseline model is also comparable to Mistral-7B-v0.3 and Llama 3 8B on MMLU (63% & 66%), and performs similarly on an average of 53 natural language understanding tasks while being trained with 6.6x less compute than Llama 3 8B. Our results highlight the importance of dataset design for training language models and offer a starting point for further research on data curation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11794v3-abstract-full').style.display = 'none'; document.getElementById('2406.11794v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://www.datacomp.ai/dclm/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09233">arXiv:2406.09233</a> <span> [<a href="https://arxiv.org/pdf/2406.09233">pdf</a>, <a href="https://arxiv.org/format/2406.09233">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> C2HLSC: Can LLMs Bridge the Software-to-Hardware Design Gap? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Collini%2C+L">Luca Collini</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a>, <a href="/search/cs?searchtype=author&query=Karri%2C+R">Ramesh Karri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09233v1-abstract-short" style="display: inline;"> High Level Synthesis (HLS) tools offer rapid hardware design from C code, but their compatibility is limited by code constructs. This paper investigates Large Language Models (LLMs) for refactoring C code into HLS-compatible formats. We present several case studies by using an LLM to rewrite C code for NIST 800-22 randomness tests, a QuickSort algorithm and AES-128 into HLS-synthesizable c. The LL… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09233v1-abstract-full').style.display = 'inline'; document.getElementById('2406.09233v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09233v1-abstract-full" style="display: none;"> High Level Synthesis (HLS) tools offer rapid hardware design from C code, but their compatibility is limited by code constructs. This paper investigates Large Language Models (LLMs) for refactoring C code into HLS-compatible formats. We present several case studies by using an LLM to rewrite C code for NIST 800-22 randomness tests, a QuickSort algorithm and AES-128 into HLS-synthesizable c. The LLM iteratively transforms the C code guided by user prompts, implementing functions like streaming data and hardware-specific signals. This evaluation demonstrates the LLM's potential to assist hardware design refactoring regular C code into HLS synthesizable C code. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09233v1-abstract-full').style.display = 'none'; document.getElementById('2406.09233v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at The First IEEE International Workshop on LLM-Aided Design</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05590">arXiv:2406.05590</a> <span> [<a href="https://arxiv.org/pdf/2406.05590">pdf</a>, <a href="https://arxiv.org/format/2406.05590">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NYU CTF Dataset: A Scalable Open-Source Benchmark Dataset for Evaluating LLMs in Offensive Security </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shao%2C+M">Minghao Shao</a>, <a href="/search/cs?searchtype=author&query=Jancheska%2C+S">Sofija Jancheska</a>, <a href="/search/cs?searchtype=author&query=Udeshi%2C+M">Meet Udeshi</a>, <a href="/search/cs?searchtype=author&query=Dolan-Gavitt%2C+B">Brendan Dolan-Gavitt</a>, <a href="/search/cs?searchtype=author&query=Xi%2C+H">Haoran Xi</a>, <a href="/search/cs?searchtype=author&query=Milner%2C+K">Kimberly Milner</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Boyuan Chen</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Max Yin</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a>, <a href="/search/cs?searchtype=author&query=Krishnamurthy%2C+P">Prashanth Krishnamurthy</a>, <a href="/search/cs?searchtype=author&query=Khorrami%2C+F">Farshad Khorrami</a>, <a href="/search/cs?searchtype=author&query=Karri%2C+R">Ramesh Karri</a>, <a href="/search/cs?searchtype=author&query=Shafique%2C+M">Muhammad Shafique</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05590v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) are being deployed across various domains today. However, their capacity to solve Capture the Flag (CTF) challenges in cybersecurity has not been thoroughly evaluated. To address this, we develop a novel method to assess LLMs in solving CTF challenges by creating a scalable, open-source benchmark database specifically designed for these applications. This database incl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05590v2-abstract-full').style.display = 'inline'; document.getElementById('2406.05590v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05590v2-abstract-full" style="display: none;"> Large Language Models (LLMs) are being deployed across various domains today. However, their capacity to solve Capture the Flag (CTF) challenges in cybersecurity has not been thoroughly evaluated. To address this, we develop a novel method to assess LLMs in solving CTF challenges by creating a scalable, open-source benchmark database specifically designed for these applications. This database includes metadata for LLM testing and adaptive learning, compiling a diverse range of CTF challenges from popular competitions. Utilizing the advanced function calling capabilities of LLMs, we build a fully automated system with an enhanced workflow and support for external tool calls. Our benchmark dataset and automated framework allow us to evaluate the performance of five LLMs, encompassing both black-box and open-source models. This work lays the foundation for future research into improving the efficiency of LLMs in interactive cybersecurity tasks and automated task planning. By providing a specialized dataset, our project offers an ideal platform for developing, testing, and refining LLM-based approaches to vulnerability detection and resolution. Evaluating LLMs on these challenges and comparing with human performance yields insights into their potential for AI-driven cybersecurity solutions to perform real-world threat management. We make our dataset open source to public https://github.com/NYU-LLM-CTF/LLM_CTF_Database along with our playground automated framework https://github.com/NYU-LLM-CTF/llm_ctf_automation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05590v2-abstract-full').style.display = 'none'; document.getElementById('2406.05590v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.03592">arXiv:2406.03592</a> <span> [<a href="https://arxiv.org/pdf/2406.03592">pdf</a>, <a href="https://arxiv.org/format/2406.03592">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Measuring Retrieval Complexity in Question Answering Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gabburo%2C+M">Matteo Gabburo</a>, <a href="/search/cs?searchtype=author&query=Jedema%2C+N+P">Nicolaas Paul Jedema</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddhant Garg</a>, <a href="/search/cs?searchtype=author&query=Ribeiro%2C+L+F+R">Leonardo F. R. Ribeiro</a>, <a href="/search/cs?searchtype=author&query=Moschitti%2C+A">Alessandro Moschitti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.03592v1-abstract-short" style="display: inline;"> In this paper, we investigate which questions are challenging for retrieval-based Question Answering (QA). We (i) propose retrieval complexity (RC), a novel metric conditioned on the completeness of retrieved documents, which measures the difficulty of answering questions, and (ii) propose an unsupervised pipeline to measure RC given an arbitrary retrieval system. Our proposed pipeline measures RC… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03592v1-abstract-full').style.display = 'inline'; document.getElementById('2406.03592v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.03592v1-abstract-full" style="display: none;"> In this paper, we investigate which questions are challenging for retrieval-based Question Answering (QA). We (i) propose retrieval complexity (RC), a novel metric conditioned on the completeness of retrieved documents, which measures the difficulty of answering questions, and (ii) propose an unsupervised pipeline to measure RC given an arbitrary retrieval system. Our proposed pipeline measures RC more accurately than alternative estimators, including LLMs, on six challenging QA benchmarks. Further investigation reveals that RC scores strongly correlate with both QA performance and expert judgment across five of the six studied benchmarks, indicating that RC is an effective measure of question difficulty. Subsequent categorization of high-RC questions shows that they span a broad set of question shapes, including multi-hop, compositional, and temporal QA, indicating that RC scores can categorize a new subset of complex questions. Our system can also have a major impact on retrieval-based systems by helping to identify more challenging questions on existing datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03592v1-abstract-full').style.display = 'none'; document.getElementById('2406.03592v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL 2024 (findings)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.15842">arXiv:2405.15842</a> <span> [<a href="https://arxiv.org/pdf/2405.15842">pdf</a>, <a href="https://arxiv.org/format/2405.15842">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Model Cascading for Code: Reducing Inference Costs with Model Cascading for LLM Based Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+B">Boyuan Chen</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+M">Mingzhi Zhu</a>, <a href="/search/cs?searchtype=author&query=Dolan-Gavitt%2C+B">Brendan Dolan-Gavitt</a>, <a href="/search/cs?searchtype=author&query=Shafique%2C+M">Muhammad Shafique</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.15842v1-abstract-short" style="display: inline;"> The rapid development of large language models (LLMs) has led to significant advancements in code completion tasks. While larger models have higher accuracy, they also cost much more to run. Meanwhile, model cascading has been proven effective to conserve computational resources while enhancing accuracy in LLMs on natural language generation tasks. It generates output with the smallest model in a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15842v1-abstract-full').style.display = 'inline'; document.getElementById('2405.15842v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.15842v1-abstract-full" style="display: none;"> The rapid development of large language models (LLMs) has led to significant advancements in code completion tasks. While larger models have higher accuracy, they also cost much more to run. Meanwhile, model cascading has been proven effective to conserve computational resources while enhancing accuracy in LLMs on natural language generation tasks. It generates output with the smallest model in a set, and only queries the larger models when it fails to meet predefined quality criteria. However, this strategy has not been used in code completion tasks, primarily because assessing the quality of code completions differs substantially from assessing natural language, where the former relies heavily on the functional correctness. To address this, we propose letting each model generate and execute a set of test cases for their solutions, and use the test results as the cascading threshold. We show that our model cascading strategy reduces computational costs while increases accuracy compared to generating the output with a single model. We also introduce a heuristics to determine the optimal combination of the number of solutions, test cases, and test lines each model should generate, based on the budget. Compared to speculative decoding, our method works on black-box models, having the same level of cost-accuracy trade-off, yet providing much more choices based on the server's budget. Ours is the first work to optimize cost-accuracy trade-off for LLM code generation with model cascading. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15842v1-abstract-full').style.display = 'none'; document.getElementById('2405.15842v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09534">arXiv:2405.09534</a> <span> [<a href="https://arxiv.org/pdf/2405.09534">pdf</a>, <a href="https://arxiv.org/format/2405.09534">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Learning-Based Compress-and-Forward Schemes for the Relay Channel </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ozyilkan%2C+E">Ezgi Ozyilkan</a>, <a href="/search/cs?searchtype=author&query=Carpi%2C+F">Fabrizio Carpi</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a>, <a href="/search/cs?searchtype=author&query=Erkip%2C+E">Elza Erkip</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09534v1-abstract-short" style="display: inline;"> The relay channel, consisting of a source-destination pair along with a relay, is a fundamental component of cooperative communications. While the capacity of a general relay channel remains unknown, various relaying strategies, including compress-and-forward (CF), have been proposed. In CF, the relay forwards a quantized version of its received signal to the destination. Given the correlated sign… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09534v1-abstract-full').style.display = 'inline'; document.getElementById('2405.09534v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09534v1-abstract-full" style="display: none;"> The relay channel, consisting of a source-destination pair along with a relay, is a fundamental component of cooperative communications. While the capacity of a general relay channel remains unknown, various relaying strategies, including compress-and-forward (CF), have been proposed. In CF, the relay forwards a quantized version of its received signal to the destination. Given the correlated signals at the relay and destination, distributed compression techniques, such as Wyner--Ziv coding, can be harnessed to utilize the relay-to-destination link more efficiently. Leveraging recent advances in neural network-based distributed compression, we revisit the relay channel problem and integrate a learned task-aware Wyner--Ziv compressor into a primitive relay channel with a finite-capacity out-of-band relay-to-destination link. The resulting neural CF scheme demonstrates that our compressor recovers binning of the quantized indices at the relay, mimicking the optimal asymptotic CF strategy, although no structure exploiting the knowledge of source statistics was imposed into the design. The proposed neural CF, employing finite order modulation, operates closely to the rate achievable in a primitive relay channel with a Gaussian codebook. We showcase the advantages of exploiting the correlated destination signal for relay compression through various neural CF architectures that involve end-to-end training of the compressor and the demodulator components. Our learned task-oriented compressors provide the first proof-of-concept work toward interpretable and practical neural CF relaying schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09534v1-abstract-full').style.display = 'none'; document.getElementById('2405.09534v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">journal submission under review. arXiv admin note: substantial text overlap with arXiv:2404.14594</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.05792">arXiv:2405.05792</a> <span> [<a href="https://arxiv.org/pdf/2405.05792">pdf</a>, <a href="https://arxiv.org/format/2405.05792">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> RoboHop: Segment-based Topological Map Representation for Open-World Visual Navigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sourav Garg</a>, <a href="/search/cs?searchtype=author&query=Rana%2C+K">Krishan Rana</a>, <a href="/search/cs?searchtype=author&query=Hosseinzadeh%2C+M">Mehdi Hosseinzadeh</a>, <a href="/search/cs?searchtype=author&query=Mares%2C+L">Lachlan Mares</a>, <a href="/search/cs?searchtype=author&query=S%C3%BCnderhauf%2C+N">Niko S眉nderhauf</a>, <a href="/search/cs?searchtype=author&query=Dayoub%2C+F">Feras Dayoub</a>, <a href="/search/cs?searchtype=author&query=Reid%2C+I">Ian Reid</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.05792v1-abstract-short" style="display: inline;"> Mapping is crucial for spatial reasoning, planning and robot navigation. Existing approaches range from metric, which require precise geometry-based optimization, to purely topological, where image-as-node based graphs lack explicit object-level reasoning and interconnectivity. In this paper, we propose a novel topological representation of an environment based on "image segments", which are seman… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05792v1-abstract-full').style.display = 'inline'; document.getElementById('2405.05792v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.05792v1-abstract-full" style="display: none;"> Mapping is crucial for spatial reasoning, planning and robot navigation. Existing approaches range from metric, which require precise geometry-based optimization, to purely topological, where image-as-node based graphs lack explicit object-level reasoning and interconnectivity. In this paper, we propose a novel topological representation of an environment based on "image segments", which are semantically meaningful and open-vocabulary queryable, conferring several advantages over previous works based on pixel-level features. Unlike 3D scene graphs, we create a purely topological graph with segments as nodes, where edges are formed by a) associating segment-level descriptors between pairs of consecutive images and b) connecting neighboring segments within an image using their pixel centroids. This unveils a "continuous sense of a place", defined by inter-image persistence of segments along with their intra-image neighbours. It further enables us to represent and update segment-level descriptors through neighborhood aggregation using graph convolution layers, which improves robot localization based on segment-level retrieval. Using real-world data, we show how our proposed map representation can be used to i) generate navigation plans in the form of "hops over segments" and ii) search for target objects using natural language queries describing spatial relations of objects. Furthermore, we quantitatively analyze data association at the segment level, which underpins inter-image connectivity during mapping and segment-level localization when revisiting the same place. Finally, we show preliminary trials on segment-level `hopping' based zero-shot real-world navigation. Project page with supplementary details: oravus.github.io/RoboHop/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05792v1-abstract-full').style.display = 'none'; document.getElementById('2405.05792v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at ICRA 2024; 9 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.05343">arXiv:2405.05343</a> <span> [<a href="https://arxiv.org/pdf/2405.05343">pdf</a>, <a href="https://arxiv.org/format/2405.05343">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> Distributed Least Squares in Small Space via Sketching and Bias Reduction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sachin Garg</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+K">Kevin Tan</a>, <a href="/search/cs?searchtype=author&query=Derezi%C5%84ski%2C+M">Micha艂 Derezi艅ski</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.05343v1-abstract-short" style="display: inline;"> Matrix sketching is a powerful tool for reducing the size of large data matrices. Yet there are fundamental limitations to this size reduction when we want to recover an accurate estimator for a task such as least square regression. We show that these limitations can be circumvented in the distributed setting by designing sketching methods that minimize the bias of the estimator, rather than its e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05343v1-abstract-full').style.display = 'inline'; document.getElementById('2405.05343v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.05343v1-abstract-full" style="display: none;"> Matrix sketching is a powerful tool for reducing the size of large data matrices. Yet there are fundamental limitations to this size reduction when we want to recover an accurate estimator for a task such as least square regression. We show that these limitations can be circumvented in the distributed setting by designing sketching methods that minimize the bias of the estimator, rather than its error. In particular, we give a sparse sketching method running in optimal space and current matrix multiplication time, which recovers a nearly-unbiased least squares estimator using two passes over the data. This leads to new communication-efficient distributed averaging algorithms for least squares and related tasks, which directly improve on several prior approaches. Our key novelty is a new bias analysis for sketched least squares, giving a sharp characterization of its dependence on the sketch sparsity. The techniques include new higher-moment restricted Bai-Silverstein inequalities, which are of independent interest to the non-asymptotic analysis of deterministic equivalents for random matrices that arise from sketching. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05343v1-abstract-full').style.display = 'none'; document.getElementById('2405.05343v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.04678">arXiv:2405.04678</a> <span> [<a href="https://arxiv.org/pdf/2405.04678">pdf</a>, <a href="https://arxiv.org/format/2405.04678">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Pipe Routing with Topology Control for UAV Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Devaraju%2C+S">Shreyas Devaraju</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shivam Garg</a>, <a href="/search/cs?searchtype=author&query=Ihler%2C+A">Alexander Ihler</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+S">Sunil Kumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.04678v1-abstract-short" style="display: inline;"> Routing protocols help in transmitting the sensed data from UAVs monitoring the targets (called target UAVs) to the BS. However, the highly dynamic nature of an autonomous, decentralized UAV network leads to frequent route breaks or traffic disruptions. Traditional routing schemes cannot quickly adapt to dynamic UAV networks and/or incur large control overhead and delays. To establish stable, high… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04678v1-abstract-full').style.display = 'inline'; document.getElementById('2405.04678v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.04678v1-abstract-full" style="display: none;"> Routing protocols help in transmitting the sensed data from UAVs monitoring the targets (called target UAVs) to the BS. However, the highly dynamic nature of an autonomous, decentralized UAV network leads to frequent route breaks or traffic disruptions. Traditional routing schemes cannot quickly adapt to dynamic UAV networks and/or incur large control overhead and delays. To establish stable, high-quality routes from target UAVs to the BS, we design a hybrid reactive routing scheme called pipe routing that is mobility, congestion, and energy-aware. The pipe routing scheme discovers routes on-demand and proactively switches to alternate high-quality routes within a limited region around the active routes (called the pipe) when needed, reducing the number of route breaks and increasing data throughput. We then design a novel topology control-based pipe routing scheme to maintain robust connectivity in the pipe region around the active routes, leading to improved route stability and increased throughput with minimal impact on the coverage performance of the UAV network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04678v1-abstract-full').style.display = 'none'; document.getElementById('2405.04678v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.02326">arXiv:2405.02326</a> <span> [<a href="https://arxiv.org/pdf/2405.02326">pdf</a>, <a href="https://arxiv.org/format/2405.02326">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Evaluating LLMs for Hardware Design and Test </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Blocklove%2C+J">Jason Blocklove</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a>, <a href="/search/cs?searchtype=author&query=Karri%2C+R">Ramesh Karri</a>, <a href="/search/cs?searchtype=author&query=Pearce%2C+H">Hammond Pearce</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.02326v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated capabilities for producing code in Hardware Description Languages (HDLs). However, most of the focus remains on their abilities to write functional code, not test code. The hardware design process consists of both design and test, and so eschewing validation and verification leaves considerable potential benefit unexplored, given that a design and tes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.02326v1-abstract-full').style.display = 'inline'; document.getElementById('2405.02326v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.02326v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated capabilities for producing code in Hardware Description Languages (HDLs). However, most of the focus remains on their abilities to write functional code, not test code. The hardware design process consists of both design and test, and so eschewing validation and verification leaves considerable potential benefit unexplored, given that a design and test framework may allow for progress towards full automation of the digital design pipeline. In this work, we perform one of the first studies exploring how a LLM can both design and test hardware modules from provided specifications. Using a suite of 8 representative benchmarks, we examined the capabilities and limitations of the state-of-the-art conversational LLMs when producing Verilog for functional and verification purposes. We taped out the benchmarks on a Skywater 130nm shuttle and received the functional chip. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.02326v1-abstract-full').style.display = 'none'; document.getElementById('2405.02326v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.01858">arXiv:2405.01858</a> <span> [<a href="https://arxiv.org/pdf/2405.01858">pdf</a>, <a href="https://arxiv.org/format/2405.01858">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> SUKHSANDESH: An Avatar Therapeutic Question Answering Platform for Sexual Education in Rural India </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+S+M">Salam Michael Singh</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S+K">Shubhmoy Kumar Garg</a>, <a href="/search/cs?searchtype=author&query=Misra%2C+A">Amitesh Misra</a>, <a href="/search/cs?searchtype=author&query=Seth%2C+A">Aaditeshwar Seth</a>, <a href="/search/cs?searchtype=author&query=Chakraborty%2C+T">Tanmoy Chakraborty</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.01858v1-abstract-short" style="display: inline;"> Sexual education aims to foster a healthy lifestyle in terms of emotional, mental and social well-being. In countries like India, where adolescents form the largest demographic group, they face significant vulnerabilities concerning sexual health. Unfortunately, sexual education is often stigmatized, creating barriers to providing essential counseling and information to this at-risk population. Co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01858v1-abstract-full').style.display = 'inline'; document.getElementById('2405.01858v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.01858v1-abstract-full" style="display: none;"> Sexual education aims to foster a healthy lifestyle in terms of emotional, mental and social well-being. In countries like India, where adolescents form the largest demographic group, they face significant vulnerabilities concerning sexual health. Unfortunately, sexual education is often stigmatized, creating barriers to providing essential counseling and information to this at-risk population. Consequently, issues such as early pregnancy, unsafe abortions, sexually transmitted infections, and sexual violence become prevalent. Our current proposal aims to provide a safe and trustworthy platform for sexual education to the vulnerable rural Indian population, thereby fostering the healthy and overall growth of the nation. In this regard, we strive towards designing SUKHSANDESH, a multi-staged AI-based Question Answering platform for sexual education tailored to rural India, adhering to safety guardrails and regional language support. By utilizing information retrieval techniques and large language models, SUKHSANDESH will deliver effective responses to user queries. We also propose to anonymise the dataset to mitigate safety measures and set AI guardrails against any harmful or unwanted response generation. Moreover, an innovative feature of our proposal involves integrating ``avatar therapy'' with SUKHSANDESH. This feature will convert AI-generated responses into real-time audio delivered by an animated avatar speaking regional Indian languages. This approach aims to foster empathy and connection, which is particularly beneficial for individuals with limited literacy skills. Partnering with Gram Vaani, an industry leader, we will deploy SUKHSANDESH to address sexual education needs in rural India. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01858v1-abstract-full').style.display = 'none'; document.getElementById('2405.01858v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.01073">arXiv:2405.01073</a> <span> [<a href="https://arxiv.org/pdf/2405.01073">pdf</a>, <a href="https://arxiv.org/format/2405.01073">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Poisoning Attacks on Federated Learning for Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sonakshi Garg</a>, <a href="/search/cs?searchtype=author&query=J%C3%B6nsson%2C+H">Hugo J枚nsson</a>, <a href="/search/cs?searchtype=author&query=Kalander%2C+G">Gustav Kalander</a>, <a href="/search/cs?searchtype=author&query=Nilsson%2C+A">Axel Nilsson</a>, <a href="/search/cs?searchtype=author&query=Pirange%2C+B">Bhhaanu Pirange</a>, <a href="/search/cs?searchtype=author&query=Valadi%2C+V">Viktor Valadi</a>, <a href="/search/cs?searchtype=author&query=%C3%96stman%2C+J">Johan 脰stman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.01073v1-abstract-short" style="display: inline;"> Federated Learning (FL) is a decentralized learning paradigm, enabling parties to collaboratively train models while keeping their data confidential. Within autonomous driving, it brings the potential of reducing data storage costs, reducing bandwidth requirements, and to accelerate the learning. FL is, however, susceptible to poisoning attacks. In this paper, we introduce two novel poisoning atta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01073v1-abstract-full').style.display = 'inline'; document.getElementById('2405.01073v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.01073v1-abstract-full" style="display: none;"> Federated Learning (FL) is a decentralized learning paradigm, enabling parties to collaboratively train models while keeping their data confidential. Within autonomous driving, it brings the potential of reducing data storage costs, reducing bandwidth requirements, and to accelerate the learning. FL is, however, susceptible to poisoning attacks. In this paper, we introduce two novel poisoning attacks on FL tailored to regression tasks within autonomous driving: FLStealth and Off-Track Attack (OTA). FLStealth, an untargeted attack, aims at providing model updates that deteriorate the global model performance while appearing benign. OTA, on the other hand, is a targeted attack with the objective to change the global model's behavior when exposed to a certain trigger. We demonstrate the effectiveness of our attacks by conducting comprehensive experiments pertaining to the task of vehicle trajectory prediction. In particular, we show that, among five different untargeted attacks, FLStealth is the most successful at bypassing the considered defenses employed by the server. For OTA, we demonstrate the inability of common defense strategies to mitigate the attack, highlighting the critical need for new defensive mechanisms against targeted attacks within FL for autonomous driving. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01073v1-abstract-full').style.display = 'none'; document.getElementById('2405.01073v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to SCAI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.16137">arXiv:2404.16137</a> <span> [<a href="https://arxiv.org/pdf/2404.16137">pdf</a>, <a href="https://arxiv.org/ps/2404.16137">ps</a>, <a href="https://arxiv.org/format/2404.16137">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/SPAWC60668.2024.10694070">10.1109/SPAWC60668.2024.10694070 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Learned Pulse Shaping Design for PAPR Reduction in DFT-s-OFDM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Carpi%2C+F">Fabrizio Carpi</a>, <a href="/search/cs?searchtype=author&query=Rostami%2C+S">Soheil Rostami</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+J">Joonyoung Cho</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a>, <a href="/search/cs?searchtype=author&query=Erkip%2C+E">Elza Erkip</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C+J">Charlie Jianzhong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.16137v1-abstract-short" style="display: inline;"> High peak-to-average power ratio (PAPR) is one of the main factors limiting cell coverage for cellular systems, especially in the uplink direction. Discrete Fourier transform spread orthogonal frequency-domain multiplexing (DFT-s-OFDM) with spectrally-extended frequency-domain spectrum shaping (FDSS) is one of the efficient techniques deployed to lower the PAPR of the uplink waveforms. In this wor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16137v1-abstract-full').style.display = 'inline'; document.getElementById('2404.16137v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.16137v1-abstract-full" style="display: none;"> High peak-to-average power ratio (PAPR) is one of the main factors limiting cell coverage for cellular systems, especially in the uplink direction. Discrete Fourier transform spread orthogonal frequency-domain multiplexing (DFT-s-OFDM) with spectrally-extended frequency-domain spectrum shaping (FDSS) is one of the efficient techniques deployed to lower the PAPR of the uplink waveforms. In this work, we propose a machine learning-based framework to determine the FDSS filter, optimizing a tradeoff between the symbol error rate (SER), the PAPR, and the spectral flatness requirements. Our end-to-end optimization framework considers multiple important design constraints, including the Nyquist zero-ISI (inter-symbol interference) condition. The numerical results show that learned FDSS filters lower the PAPR compared to conventional baselines, with minimal SER degradation. Tuning the parameters of the optimization also helps us understand the fundamental limitations and characteristics of the FDSS filters for PAPR reduction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16137v1-abstract-full').style.display = 'none'; document.getElementById('2404.16137v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.14758">arXiv:2404.14758</a> <span> [<a href="https://arxiv.org/pdf/2404.14758">pdf</a>, <a href="https://arxiv.org/format/2404.14758">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Second-order Information Promotes Mini-Batch Robustness in Variance-Reduced Gradients </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+S">Sachin Garg</a>, <a href="/search/cs?searchtype=author&query=Berahas%2C+A+S">Albert S. Berahas</a>, <a href="/search/cs?searchtype=author&query=Derezi%C5%84ski%2C+M">Micha艂 Derezi艅ski</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.14758v1-abstract-short" style="display: inline;"> We show that, for finite-sum minimization problems, incorporating partial second-order information of the objective function can dramatically improve the robustness to mini-batch size of variance-reduced stochastic gradient methods, making them more scalable while retaining their benefits over traditional Newton-type approaches. We demonstrate this phenomenon on a prototypical stochastic second-or… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14758v1-abstract-full').style.display = 'inline'; document.getElementById('2404.14758v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.14758v1-abstract-full" style="display: none;"> We show that, for finite-sum minimization problems, incorporating partial second-order information of the objective function can dramatically improve the robustness to mini-batch size of variance-reduced stochastic gradient methods, making them more scalable while retaining their benefits over traditional Newton-type approaches. We demonstrate this phenomenon on a prototypical stochastic second-order algorithm, called Mini-Batch Stochastic Variance-Reduced Newton ($\texttt{Mb-SVRN}$), which combines variance-reduced gradient estimates with access to an approximate Hessian oracle. In particular, we show that when the data size $n$ is sufficiently large, i.e., $n\gg 伪^2魏$, where $魏$ is the condition number and $伪$ is the Hessian approximation factor, then $\texttt{Mb-SVRN}$ achieves a fast linear convergence rate that is independent of the gradient mini-batch size $b$, as long $b$ is in the range between $1$ and $b_{\max}=O(n/(伪\log n))$. Only after increasing the mini-batch size past this critical point $b_{\max}$, the method begins to transition into a standard Newton-type algorithm which is much more sensitive to the Hessian approximation quality. We demonstrate this phenomenon empirically on benchmark optimization tasks showing that, after tuning the step size, the convergence rate of $\texttt{Mb-SVRN}$ remains fast for a wide range of mini-batch sizes, and the dependence of the phase transition point $b_{\max}$ on the Hessian approximation factor $伪$ aligns with our theoretical predictions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14758v1-abstract-full').style.display = 'none'; document.getElementById('2404.14758v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 65K05; 90C06; 90C30 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.14594">arXiv:2404.14594</a> <span> [<a href="https://arxiv.org/pdf/2404.14594">pdf</a>, <a href="https://arxiv.org/format/2404.14594">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Neural Compress-and-Forward for the Relay Channel </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ozyilkan%2C+E">Ezgi Ozyilkan</a>, <a href="/search/cs?searchtype=author&query=Carpi%2C+F">Fabrizio Carpi</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a>, <a href="/search/cs?searchtype=author&query=Erkip%2C+E">Elza Erkip</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.14594v1-abstract-short" style="display: inline;"> The relay channel, consisting of a source-destination pair and a relay, is a fundamental component of cooperative communications. While the capacity of a general relay channel remains unknown, various relaying strategies, including compress-and-forward (CF), have been proposed. For CF, given the correlated signals at the relay and destination, distributed compression techniques, such as Wyner-Ziv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14594v1-abstract-full').style.display = 'inline'; document.getElementById('2404.14594v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.14594v1-abstract-full" style="display: none;"> The relay channel, consisting of a source-destination pair and a relay, is a fundamental component of cooperative communications. While the capacity of a general relay channel remains unknown, various relaying strategies, including compress-and-forward (CF), have been proposed. For CF, given the correlated signals at the relay and destination, distributed compression techniques, such as Wyner-Ziv coding, can be harnessed to utilize the relay-to-destination link more efficiently. In light of the recent advancements in neural network-based distributed compression, we revisit the relay channel problem, where we integrate a learned one-shot Wyner--Ziv compressor into a primitive relay channel with a finite-capacity and orthogonal (or out-of-band) relay-to-destination link. The resulting neural CF scheme demonstrates that our task-oriented compressor recovers "binning" of the quantized indices at the relay, mimicking the optimal asymptotic CF strategy, although no structure exploiting the knowledge of source statistics was imposed into the design. We show that the proposed neural CF scheme, employing finite order modulation, operates closely to the capacity of a primitive relay channel that assumes a Gaussian codebook. Our learned compressor provides the first proof-of-concept work toward a practical neural CF relaying scheme. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14594v1-abstract-full').style.display = 'none'; document.getElementById('2404.14594v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">in submission, under review</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Garg%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Garg%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Garg%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Garg%2C+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Garg%2C+S&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Garg%2C+S&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>