CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 98 results for author: <span class="mathjax">Majumdar, S</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Majumdar%2C+S">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Majumdar, S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Majumdar%2C+S&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Majumdar, S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Majumdar%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Majumdar%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Majumdar%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15396">arXiv:2501.15396</a> <span> [<a href="https://arxiv.org/pdf/2501.15396">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Foundations of a Knee Joint Digital Twin from qMRI Biomarkers for Osteoarthritis and Knee Replacement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hoyer%2C+G">Gabrielle Hoyer</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+K+T">Kenneth T Gao</a>, <a href="/search/cs?searchtype=author&query=Gassert%2C+F+G">Felix G Gassert</a>, <a href="/search/cs?searchtype=author&query=Luitjens%2C+J">Johanna Luitjens</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+F">Fei Jiang</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Sharmila Majumdar</a>, <a href="/search/cs?searchtype=author&query=Pedoia%2C+V">Valentina Pedoia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15396v1-abstract-short" style="display: inline;"> This study forms the basis of a digital twin system of the knee joint, using advanced quantitative MRI (qMRI) and machine learning to advance precision health in osteoarthritis (OA) management and knee replacement (KR) prediction. We combined deep learning-based segmentation of knee joint structures with dimensionality reduction to create an embedded feature space of imaging biomarkers. Through cr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15396v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15396v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15396v1-abstract-full" style="display: none;"> This study forms the basis of a digital twin system of the knee joint, using advanced quantitative MRI (qMRI) and machine learning to advance precision health in osteoarthritis (OA) management and knee replacement (KR) prediction. We combined deep learning-based segmentation of knee joint structures with dimensionality reduction to create an embedded feature space of imaging biomarkers. Through cross-sectional cohort analysis and statistical modeling, we identified specific biomarkers, including variations in cartilage thickness and medial meniscus shape, that are significantly associated with OA incidence and KR outcomes. Integrating these findings into a comprehensive framework represents a considerable step toward personalized knee-joint digital twins, which could enhance therapeutic strategies and inform clinical decision-making in rheumatological care. This versatile and reliable infrastructure has the potential to be extended to broader clinical applications in precision health. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15396v1-abstract-full').style.display = 'none'; document.getElementById('2501.15396v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This manuscript builds on an earlier preprint version available on Research Square: https://doi.org/10.21203/rs.3.rs-4317958/v1</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13376">arXiv:2501.13376</a> <span> [<a href="https://arxiv.org/pdf/2501.13376">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Scalable Evaluation Framework for Foundation Models in Musculoskeletal MRI Bridging Computational Innovation with Clinical Utility </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hoyer%2C+G">Gabrielle Hoyer</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+M+W">Michelle W Tong</a>, <a href="/search/cs?searchtype=author&query=Bhattacharjee%2C+R">Rupsa Bhattacharjee</a>, <a href="/search/cs?searchtype=author&query=Pedoia%2C+V">Valentina Pedoia</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Sharmila Majumdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13376v1-abstract-short" style="display: inline;"> Foundation models hold transformative potential for medical imaging, but their clinical utility requires rigorous evaluation to address their strengths and limitations. This study introduces an evaluation framework for assessing the clinical impact and translatability of SAM, MedSAM, and SAM2, using musculoskeletal MRI as a case study. We tested these models across zero-shot and finetuned paradigm… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13376v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13376v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13376v1-abstract-full" style="display: none;"> Foundation models hold transformative potential for medical imaging, but their clinical utility requires rigorous evaluation to address their strengths and limitations. This study introduces an evaluation framework for assessing the clinical impact and translatability of SAM, MedSAM, and SAM2, using musculoskeletal MRI as a case study. We tested these models across zero-shot and finetuned paradigms to assess their ability to process diverse anatomical structures and effectuate clinically reliable biomarkers, including cartilage thickness, muscle volume, and disc height. We engineered a modular pipeline emphasizing scalability, clinical relevance, and workflow integration, reducing manual effort and aligning validation with end-user expectations. Hierarchical modeling revealed how dataset mixing, anatomical complexity, and MRI acquisition parameters influence performance, providing insights into the role of imaging refinements in improving segmentation accuracy. This work demonstrates how clinically focused evaluations can connect computational advancements with tangible applications, creating a pathway for foundation models to address medical challenges. By emphasizing interdisciplinary collaboration and aligning technical innovation with clinical priorities, our framework provides a roadmap for advancing machine learning technologies into scalable and impactful biomedical solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13376v1-abstract-full').style.display = 'none'; document.getElementById('2501.13376v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.20875">arXiv:2412.20875</a> <span> [<a href="https://arxiv.org/pdf/2412.20875">pdf</a>, <a href="https://arxiv.org/format/2412.20875">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Attention Is All You Need For Mixture-of-Depths Routing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gadhikar%2C+A">Advait Gadhikar</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S+K">Souptik Kumar Majumdar</a>, <a href="/search/cs?searchtype=author&query=Popp%2C+N">Niclas Popp</a>, <a href="/search/cs?searchtype=author&query=Saranrittichai%2C+P">Piyapat Saranrittichai</a>, <a href="/search/cs?searchtype=author&query=Rapp%2C+M">Martin Rapp</a>, <a href="/search/cs?searchtype=author&query=Schott%2C+L">Lukas Schott</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.20875v1-abstract-short" style="display: inline;"> Advancements in deep learning are driven by training models with increasingly larger numbers of parameters, which in turn heightens the computational demands. To address this issue, Mixture-of-Depths (MoD) models have been proposed to dynamically assign computations only to the most relevant parts of the inputs, thereby enabling the deployment of large-parameter models with high efficiency during… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20875v1-abstract-full').style.display = 'inline'; document.getElementById('2412.20875v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.20875v1-abstract-full" style="display: none;"> Advancements in deep learning are driven by training models with increasingly larger numbers of parameters, which in turn heightens the computational demands. To address this issue, Mixture-of-Depths (MoD) models have been proposed to dynamically assign computations only to the most relevant parts of the inputs, thereby enabling the deployment of large-parameter models with high efficiency during inference and training. These MoD models utilize a routing mechanism to determine which tokens should be processed by a layer, or skipped. However, conventional MoD models employ additional network layers specifically for the routing which are difficult to train, and add complexity and deployment overhead to the model. In this paper, we introduce a novel attention-based routing mechanism A-MoD that leverages the existing attention map of the preceding layer for routing decisions within the current layer. Compared to standard routing, A-MoD allows for more efficient training as it introduces no additional trainable parameters and can be easily adapted from pretrained transformer models. Furthermore, it can increase the performance of the MoD model. For instance, we observe up to 2% higher accuracy on ImageNet compared to standard routing and isoFLOP ViT baselines. Furthermore, A-MoD improves the MoD training convergence, leading to up to 2x faster transfer learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20875v1-abstract-full').style.display = 'none'; document.getElementById('2412.20875v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 19 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22284">arXiv:2410.22284</a> <span> [<a href="https://arxiv.org/pdf/2410.22284">pdf</a>, <a href="https://arxiv.org/format/2410.22284">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Embedding-based classifiers can detect prompt injection attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ayub%2C+M+A">Md. Ahsan Ayub</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Subhabrata Majumdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22284v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) are seeing significant adoption in every type of organization due to their exceptional generative capabilities. However, LLMs are found to be vulnerable to various adversarial attacks, particularly prompt injection attacks, which trick them into producing harmful or inappropriate content. Adversaries execute such attacks by crafting malicious prompts to deceive the LLM… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22284v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22284v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22284v1-abstract-full" style="display: none;"> Large Language Models (LLMs) are seeing significant adoption in every type of organization due to their exceptional generative capabilities. However, LLMs are found to be vulnerable to various adversarial attacks, particularly prompt injection attacks, which trick them into producing harmful or inappropriate content. Adversaries execute such attacks by crafting malicious prompts to deceive the LLMs. In this paper, we propose a novel approach based on embedding-based Machine Learning (ML) classifiers to protect LLM-based applications against this severe threat. We leverage three commonly used embedding models to generate embeddings of malicious and benign prompts and utilize ML classifiers to predict whether an input prompt is malicious. Out of several traditional ML methods, we achieve the best performance with classifiers built using Random Forest and XGBoost. Our classifiers outperform state-of-the-art prompt injection classifiers available in open-source implementations, which use encoder-only neural networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22284v1-abstract-full').style.display = 'none'; document.getElementById('2410.22284v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16700">arXiv:2410.16700</a> <span> [<a href="https://arxiv.org/pdf/2410.16700">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> AskBeacon -- Performing genomic data exchange and analytics with natural language </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wickramarachchi%2C+A">Anuradha Wickramarachchi</a>, <a href="/search/cs?searchtype=author&query=Tonni%2C+S">Shakila Tonni</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Sonali Majumdar</a>, <a href="/search/cs?searchtype=author&query=Karimi%2C+S">Sarvnaz Karimi</a>, <a href="/search/cs?searchtype=author&query=K%C3%B5ks%2C+S">Sulev K玫ks</a>, <a href="/search/cs?searchtype=author&query=Hosking%2C+B">Brendan Hosking</a>, <a href="/search/cs?searchtype=author&query=Rambla%2C+J">Jordi Rambla</a>, <a href="/search/cs?searchtype=author&query=Twine%2C+N+A">Natalie A. Twine</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+Y">Yatish Jain</a>, <a href="/search/cs?searchtype=author&query=Bauer%2C+D+C">Denis C. Bauer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16700v2-abstract-short" style="display: inline;"> Enabling clinicians and researchers to directly interact with global genomic data resources by removing technological barriers is vital for medical genomics. AskBeacon enables Large Language Models to be applied to securely shared cohorts via the GA4GH Beacon protocol. By simply "asking" Beacon, actionable insights can be gained, analyzed and made publication-ready. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16700v2-abstract-full" style="display: none;"> Enabling clinicians and researchers to directly interact with global genomic data resources by removing technological barriers is vital for medical genomics. AskBeacon enables Large Language Models to be applied to securely shared cohorts via the GA4GH Beacon protocol. By simply "asking" Beacon, actionable insights can be gained, analyzed and made publication-ready. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16700v2-abstract-full').style.display = 'none'; document.getElementById('2410.16700v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12914">arXiv:2409.12914</a> <span> [<a href="https://arxiv.org/pdf/2409.12914">pdf</a>, <a href="https://arxiv.org/format/2409.12914">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Mitigating Unsafe Feedback with Learning Constraints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rosati%2C+D">Domenic Rosati</a>, <a href="/search/cs?searchtype=author&query=Edkins%2C+G">Giles Edkins</a>, <a href="/search/cs?searchtype=author&query=Raj%2C+H">Harsh Raj</a>, <a href="/search/cs?searchtype=author&query=Atanasov%2C+D">David Atanasov</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Subhabrata Majumdar</a>, <a href="/search/cs?searchtype=author&query=Rajendran%2C+J">Janarthanan Rajendran</a>, <a href="/search/cs?searchtype=author&query=Rudzicz%2C+F">Frank Rudzicz</a>, <a href="/search/cs?searchtype=author&query=Sajjad%2C+H">Hassan Sajjad</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12914v2-abstract-short" style="display: inline;"> While there has been progress towards aligning Large Language Models (LLMs) with human values and ensuring safe behaviour at inference time, safety-guards can easily be removed when fine-tuned on unsafe and harmful datasets.While this setting has been treated extensively, another popular training paradigm, learning from unsafe feedback with reinforcement learning, has previously been unexplored. T… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12914v2-abstract-full').style.display = 'inline'; document.getElementById('2409.12914v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12914v2-abstract-full" style="display: none;"> While there has been progress towards aligning Large Language Models (LLMs) with human values and ensuring safe behaviour at inference time, safety-guards can easily be removed when fine-tuned on unsafe and harmful datasets.While this setting has been treated extensively, another popular training paradigm, learning from unsafe feedback with reinforcement learning, has previously been unexplored. This is concerning due to the widespread deployment of feedback collection systems. We address this gap by providing an analysis of learning settings where feedback is adversarial and noisy, i.e. that unsafe samples are preferred over safe ones despite model developers goal to maintain safety. We find that safety-aligned LLMs easily explore unsafe action spaces through generating harmful text and optimize for adversarial reward indicating that current safety guards are not enough to prevent learning from unsafe feedback. In order to protect against this vulnerability, we adapt a number of both "implict" and "explicit" harmful fine-tuning defences to evaluate whether they are effective as learning constraints in an RL setting finding that no method is generally effective pointing to the need for more research in defences given the widespread adoption of methods designed to learn from feedback. We end the paper with the observation that some defences work by performing "harmless reward hacking" for which we provide a theoretical explanation drawn from the theory of Constrained Markov Decision Processes and provide some direction for future defence development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12914v2-abstract-full').style.display = 'none'; document.getElementById('2409.12914v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01438">arXiv:2409.01438</a> <span> [<a href="https://arxiv.org/pdf/2409.01438">pdf</a>, <a href="https://arxiv.org/format/2409.01438">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Resource-Efficient Adaptation of Speech Foundation Models for Multi-Speaker ASR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weiqing Wang</a>, <a href="/search/cs?searchtype=author&query=Dhawan%2C+K">Kunal Dhawan</a>, <a href="/search/cs?searchtype=author&query=Park%2C+T">Taejin Park</a>, <a href="/search/cs?searchtype=author&query=Puvvada%2C+K+C">Krishna C. Puvvada</a>, <a href="/search/cs?searchtype=author&query=Medennikov%2C+I">Ivan Medennikov</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somshubra Majumdar</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">He Huang</a>, <a href="/search/cs?searchtype=author&query=Balam%2C+J">Jagadeesh Balam</a>, <a href="/search/cs?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01438v2-abstract-short" style="display: inline;"> Speech foundation models have achieved state-of-the-art (SoTA) performance across various tasks, such as automatic speech recognition (ASR) in hundreds of languages. However, multi-speaker ASR remains a challenging task for these models due to data scarcity and sparsity. In this paper, we present approaches to enable speech foundation models to process and understand multi-speaker speech with limi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01438v2-abstract-full').style.display = 'inline'; document.getElementById('2409.01438v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01438v2-abstract-full" style="display: none;"> Speech foundation models have achieved state-of-the-art (SoTA) performance across various tasks, such as automatic speech recognition (ASR) in hundreds of languages. However, multi-speaker ASR remains a challenging task for these models due to data scarcity and sparsity. In this paper, we present approaches to enable speech foundation models to process and understand multi-speaker speech with limited training data. Specifically, we adapt a speech foundation model for the multi-speaker ASR task using only telephonic data. Remarkably, the adapted model also performs well on meeting data without any fine-tuning, demonstrating the generalization ability of our approach. We conduct several ablation studies to analyze the impact of different parameters and strategies on model performance. Our findings highlight the effectiveness of our methods. Results show that less parameters give better overall cpWER, which, although counter-intuitive, provides insights into adapting speech foundation models for multi-speaker ASR tasks with minimal annotated data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01438v2-abstract-full').style.display = 'none'; document.getElementById('2409.01438v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by SLT 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.21077">arXiv:2407.21077</a> <span> [<a href="https://arxiv.org/pdf/2407.21077">pdf</a>, <a href="https://arxiv.org/format/2407.21077">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Genetic Instruct: Scaling up Synthetic Generation of Coding Instructions for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somshubra Majumdar</a>, <a href="/search/cs?searchtype=author&query=Noroozi%2C+V">Vahid Noroozi</a>, <a href="/search/cs?searchtype=author&query=Narenthiran%2C+S">Sean Narenthiran</a>, <a href="/search/cs?searchtype=author&query=Ficek%2C+A">Aleksander Ficek</a>, <a href="/search/cs?searchtype=author&query=Balam%2C+J">Jagadeesh Balam</a>, <a href="/search/cs?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.21077v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) rely on instruction samples for alignment, but creating these datasets poses challenges, particularly in expert-dependent tasks like coding, which can be cost-prohibitive. One approach to mitigate these challenges is synthesizing data using another LLM. In this paper, we introduce a scalable method for generating synthetic instructions to enhance the code generation ca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21077v1-abstract-full').style.display = 'inline'; document.getElementById('2407.21077v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.21077v1-abstract-full" style="display: none;"> Large Language Models (LLMs) rely on instruction samples for alignment, but creating these datasets poses challenges, particularly in expert-dependent tasks like coding, which can be cost-prohibitive. One approach to mitigate these challenges is synthesizing data using another LLM. In this paper, we introduce a scalable method for generating synthetic instructions to enhance the code generation capability of LLMs. The proposed algorithm, Genetic-Instruct, mimics evolutionary processes, utilizing self-instruction to create numerous synthetic samples from a limited number of seeds. Genetic-Instruct is designed for efficient scaling of the generation process. Fine-tuning multiple coding LLMs with the synthetic samples demonstrates a significant improvement in their code generation accuracy compared to the baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21077v1-abstract-full').style.display = 'none'; document.getElementById('2407.21077v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.12184">arXiv:2407.12184</a> <span> [<a href="https://arxiv.org/pdf/2407.12184">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> The object detection method aids in image reconstruction evaluation and clinical interpretation of meniscal abnormalities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Konovalova%2C+N">Natalia Konovalova</a>, <a href="/search/cs?searchtype=author&query=Tolpadi%2C+A">Aniket Tolpadi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Felix Liu</a>, <a href="/search/cs?searchtype=author&query=Akkaya%2C+Z">Zehra Akkaya</a>, <a href="/search/cs?searchtype=author&query=Gassert%2C+F">Felix Gassert</a>, <a href="/search/cs?searchtype=author&query=Giesler%2C+P">Paula Giesler</a>, <a href="/search/cs?searchtype=author&query=Luitjens%2C+J">Johanna Luitjens</a>, <a href="/search/cs?searchtype=author&query=Han%2C+M">Misung Han</a>, <a href="/search/cs?searchtype=author&query=Bahroos%2C+E">Emma Bahroos</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Sharmila Majumdar</a>, <a href="/search/cs?searchtype=author&query=Pedoia%2C+V">Valentina Pedoia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.12184v1-abstract-short" style="display: inline;"> This study investigates the relationship between deep learning (DL) image reconstruction quality and anomaly detection performance, and evaluates the efficacy of an artificial intelligence (AI) assistant in enhancing radiologists' interpretation of meniscal anomalies on reconstructed images. A retrospective study was conducted using an in-house reconstruction and anomaly detection pipeline to asse… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12184v1-abstract-full').style.display = 'inline'; document.getElementById('2407.12184v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.12184v1-abstract-full" style="display: none;"> This study investigates the relationship between deep learning (DL) image reconstruction quality and anomaly detection performance, and evaluates the efficacy of an artificial intelligence (AI) assistant in enhancing radiologists' interpretation of meniscal anomalies on reconstructed images. A retrospective study was conducted using an in-house reconstruction and anomaly detection pipeline to assess knee MR images from 896 patients. The original and 14 sets of DL-reconstructed images were evaluated using standard reconstruction and object detection metrics, alongside newly developed box-based reconstruction metrics. Two clinical radiologists reviewed a subset of 50 patients' images, both original and AI-assisted reconstructed, with subsequent assessment of their accuracy and performance characteristics. Results indicated that the structural similarity index (SSIM) showed a weaker correlation with anomaly detection metrics (mAP, r=0.64, p=0.01; F1 score, r=0.38, p=0.18), while box-based SSIM had a stronger association with detection performance (mAP, r=0.81, p<0.01; F1 score, r=0.65, p=0.01). Minor SSIM fluctuations did not affect detection outcomes, but significant changes reduced performance. Radiologists' AI-assisted evaluations demonstrated improved accuracy (86.0% without assistance vs. 88.3% with assistance, p<0.05) and interrater agreement (Cohen's kappa, 0.39 without assistance vs. 0.57 with assistance). An additional review led to the incorporation of 17 more lesions into the dataset. The proposed anomaly detection method shows promise in evaluating reconstruction algorithms for automated tasks and aiding radiologists in interpreting DL-reconstructed MR images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12184v1-abstract-full').style.display = 'none'; document.getElementById('2407.12184v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19674">arXiv:2406.19674</a> <span> [<a href="https://arxiv.org/pdf/2406.19674">pdf</a>, <a href="https://arxiv.org/format/2406.19674">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Less is More: Accurate Speech Recognition & Translation without Web-Scale Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Puvvada%2C+K+C">Krishna C. Puvvada</a>, <a href="/search/cs?searchtype=author&query=%C5%BBelasko%2C+P">Piotr 呕elasko</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">He Huang</a>, <a href="/search/cs?searchtype=author&query=Hrinchuk%2C+O">Oleksii Hrinchuk</a>, <a href="/search/cs?searchtype=author&query=Koluguri%2C+N+R">Nithin Rao Koluguri</a>, <a href="/search/cs?searchtype=author&query=Dhawan%2C+K">Kunal Dhawan</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somshubra Majumdar</a>, <a href="/search/cs?searchtype=author&query=Rastorgueva%2C+E">Elena Rastorgueva</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhehuai Chen</a>, <a href="/search/cs?searchtype=author&query=Lavrukhin%2C+V">Vitaly Lavrukhin</a>, <a href="/search/cs?searchtype=author&query=Balam%2C+J">Jagadeesh Balam</a>, <a href="/search/cs?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19674v1-abstract-short" style="display: inline;"> Recent advances in speech recognition and translation rely on hundreds of thousands of hours of Internet speech data. We argue that state-of-the art accuracy can be reached without relying on web-scale data. Canary - multilingual ASR and speech translation model, outperforms current state-of-the-art models - Whisper, OWSM, and Seamless-M4T on English, French, Spanish, and German languages, while b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19674v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19674v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19674v1-abstract-full" style="display: none;"> Recent advances in speech recognition and translation rely on hundreds of thousands of hours of Internet speech data. We argue that state-of-the art accuracy can be reached without relying on web-scale data. Canary - multilingual ASR and speech translation model, outperforms current state-of-the-art models - Whisper, OWSM, and Seamless-M4T on English, French, Spanish, and German languages, while being trained on an order of magnitude less data than these models. Three key factors enables such data-efficient model: (1) a FastConformer-based attention encoder-decoder architecture (2) training on synthetic data generated with machine translation and (3) advanced training techniques: data-balancing, dynamic data blending, dynamic bucketing and noise-robust fine-tuning. The model, weights, and training code will be open-sourced. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19674v1-abstract-full').style.display = 'none'; document.getElementById('2406.19674v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at Interspeech-2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.12946">arXiv:2406.12946</a> <span> [<a href="https://arxiv.org/pdf/2406.12946">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Instruction Data Generation and Unsupervised Adaptation for Speech Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Noroozi%2C+V">Vahid Noroozi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhehuai Chen</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somshubra Majumdar</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Steve Huang</a>, <a href="/search/cs?searchtype=author&query=Balam%2C+J">Jagadeesh Balam</a>, <a href="/search/cs?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.12946v1-abstract-short" style="display: inline;"> In this paper, we propose three methods for generating synthetic samples to train and evaluate multimodal large language models capable of processing both text and speech inputs. Addressing the scarcity of samples containing both modalities, synthetic data generation emerges as a crucial strategy to enhance the performance of such systems and facilitate the modeling of cross-modal relationships be… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12946v1-abstract-full').style.display = 'inline'; document.getElementById('2406.12946v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.12946v1-abstract-full" style="display: none;"> In this paper, we propose three methods for generating synthetic samples to train and evaluate multimodal large language models capable of processing both text and speech inputs. Addressing the scarcity of samples containing both modalities, synthetic data generation emerges as a crucial strategy to enhance the performance of such systems and facilitate the modeling of cross-modal relationships between the speech and text domains. Our process employs large language models to generate textual components and text-to-speech systems to generate speech components. The proposed methods offer a practical and effective means to expand the training dataset for these models. Experimental results show progress in achieving an integrated understanding of text and speech. We also highlight the potential of using unlabeled speech data to generate synthetic samples comparable in quality to those with available transcriptions, enabling the expansion of these models to more languages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12946v1-abstract-full').style.display = 'none'; document.getElementById('2406.12946v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for Interspeech 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11871">arXiv:2406.11871</a> <span> [<a href="https://arxiv.org/pdf/2406.11871">pdf</a>, <a href="https://arxiv.org/format/2406.11871">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Generative AI Voting: Fair Collective Choice is Resilient to LLM Biases and Inconsistencies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Srijoni Majumdar</a>, <a href="/search/cs?searchtype=author&query=Elkind%2C+E">Edith Elkind</a>, <a href="/search/cs?searchtype=author&query=Pournaras%2C+E">Evangelos Pournaras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11871v3-abstract-short" style="display: inline;"> Scaling up deliberative and voting participation is a longstanding endeavor -- a cornerstone for direct democracy and legitimate collective choice. Recent breakthroughs in generative artificial intelligence (AI) and large language models (LLMs) unravel new capabilities for AI personal assistants to overcome cognitive bandwidth limitations of humans, providing decision support or even direct repres… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11871v3-abstract-full').style.display = 'inline'; document.getElementById('2406.11871v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11871v3-abstract-full" style="display: none;"> Scaling up deliberative and voting participation is a longstanding endeavor -- a cornerstone for direct democracy and legitimate collective choice. Recent breakthroughs in generative artificial intelligence (AI) and large language models (LLMs) unravel new capabilities for AI personal assistants to overcome cognitive bandwidth limitations of humans, providing decision support or even direct representation of human voters at large scale. However, the quality of this representation and what underlying biases manifest when delegating collective decision-making to LLMs is an alarming and timely challenge to tackle. By rigorously emulating with high realism more than >50K LLM voting personas in 81 real-world voting elections, we disentangle the nature of different biases in LLMS (GPT 3, GPT 3.5, and Llama2). Complex preferential ballot formats exhibit significant inconsistencies compared to simpler majoritarian elections that show higher consistency. Strikingly though, by demonstrating for the first time in real-world a proportional representation of voters in direct democracy, we are also able to show that fair ballot aggregation methods, such as equal shares, prove to be a win-win: fairer voting outcomes for humans with fairer AI representation. This novel underlying relationship proves paramount for democratic resilience in progressives scenarios with low voters turnout and voter fatigue supported by AI representatives: abstained voters are mitigated by recovering highly representative voting outcomes that are fairer. These interdisciplinary insights provide remarkable foundations for science, policymakers, and citizens to develop safeguards and resilience for AI risks in democratic innovations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11871v3-abstract-full').style.display = 'none'; document.getElementById('2406.11871v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11704">arXiv:2406.11704</a> <span> [<a href="https://arxiv.org/pdf/2406.11704">pdf</a>, <a href="https://arxiv.org/format/2406.11704">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Nemotron-4 340B Technical Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nvidia"> Nvidia</a>, <a href="/search/cs?searchtype=author&query=%3A"> :</a>, <a href="/search/cs?searchtype=author&query=Adler%2C+B">Bo Adler</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+N">Niket Agarwal</a>, <a href="/search/cs?searchtype=author&query=Aithal%2C+A">Ashwath Aithal</a>, <a href="/search/cs?searchtype=author&query=Anh%2C+D+H">Dong H. Anh</a>, <a href="/search/cs?searchtype=author&query=Bhattacharya%2C+P">Pallab Bhattacharya</a>, <a href="/search/cs?searchtype=author&query=Brundyn%2C+A">Annika Brundyn</a>, <a href="/search/cs?searchtype=author&query=Casper%2C+J">Jared Casper</a>, <a href="/search/cs?searchtype=author&query=Catanzaro%2C+B">Bryan Catanzaro</a>, <a href="/search/cs?searchtype=author&query=Clay%2C+S">Sharon Clay</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+J">Jonathan Cohen</a>, <a href="/search/cs?searchtype=author&query=Das%2C+S">Sirshak Das</a>, <a href="/search/cs?searchtype=author&query=Dattagupta%2C+A">Ayush Dattagupta</a>, <a href="/search/cs?searchtype=author&query=Delalleau%2C+O">Olivier Delalleau</a>, <a href="/search/cs?searchtype=author&query=Derczynski%2C+L">Leon Derczynski</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yi Dong</a>, <a href="/search/cs?searchtype=author&query=Egert%2C+D">Daniel Egert</a>, <a href="/search/cs?searchtype=author&query=Evans%2C+E">Ellie Evans</a>, <a href="/search/cs?searchtype=author&query=Ficek%2C+A">Aleksander Ficek</a>, <a href="/search/cs?searchtype=author&query=Fridman%2C+D">Denys Fridman</a>, <a href="/search/cs?searchtype=author&query=Ghosh%2C+S">Shaona Ghosh</a>, <a href="/search/cs?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a>, <a href="/search/cs?searchtype=author&query=Gitman%2C+I">Igor Gitman</a>, <a href="/search/cs?searchtype=author&query=Grzegorzek%2C+T">Tomasz Grzegorzek</a> , et al. (58 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11704v2-abstract-short" style="display: inline;"> We release the Nemotron-4 340B model family, including Nemotron-4-340B-Base, Nemotron-4-340B-Instruct, and Nemotron-4-340B-Reward. Our models are open access under the NVIDIA Open Model License Agreement, a permissive model license that allows distribution, modification, and use of the models and its outputs. These models perform competitively to open access models on a wide range of evaluation be… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11704v2-abstract-full').style.display = 'inline'; document.getElementById('2406.11704v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11704v2-abstract-full" style="display: none;"> We release the Nemotron-4 340B model family, including Nemotron-4-340B-Base, Nemotron-4-340B-Instruct, and Nemotron-4-340B-Reward. Our models are open access under the NVIDIA Open Model License Agreement, a permissive model license that allows distribution, modification, and use of the models and its outputs. These models perform competitively to open access models on a wide range of evaluation benchmarks, and were sized to fit on a single DGX H100 with 8 GPUs when deployed in FP8 precision. We believe that the community can benefit from these models in various research studies and commercial applications, especially for generating synthetic data to train smaller language models. Notably, over 98% of data used in our model alignment process is synthetically generated, showcasing the effectiveness of these models in generating synthetic data. To further support open research and facilitate model development, we are also open-sourcing the synthetic data generation pipeline used in our model alignment process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11704v2-abstract-full').style.display = 'none'; document.getElementById('2406.11704v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11036">arXiv:2406.11036</a> <span> [<a href="https://arxiv.org/pdf/2406.11036">pdf</a>, <a href="https://arxiv.org/format/2406.11036">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> garak: A Framework for Security Probing Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Derczynski%2C+L">Leon Derczynski</a>, <a href="/search/cs?searchtype=author&query=Galinkin%2C+E">Erick Galinkin</a>, <a href="/search/cs?searchtype=author&query=Martin%2C+J">Jeffrey Martin</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Subho Majumdar</a>, <a href="/search/cs?searchtype=author&query=Inie%2C+N">Nanna Inie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11036v1-abstract-short" style="display: inline;"> As Large Language Models (LLMs) are deployed and integrated into thousands of applications, the need for scalable evaluation of how models respond to adversarial attacks grows rapidly. However, LLM security is a moving target: models produce unpredictable output, are constantly updated, and the potential adversary is highly diverse: anyone with access to the internet and a decent command of natura… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11036v1-abstract-full').style.display = 'inline'; document.getElementById('2406.11036v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11036v1-abstract-full" style="display: none;"> As Large Language Models (LLMs) are deployed and integrated into thousands of applications, the need for scalable evaluation of how models respond to adversarial attacks grows rapidly. However, LLM security is a moving target: models produce unpredictable output, are constantly updated, and the potential adversary is highly diverse: anyone with access to the internet and a decent command of natural language. Further, what constitutes a security weak in one context may not be an issue in a different context; one-fits-all guardrails remain theoretical. In this paper, we argue that it is time to rethink what constitutes ``LLM security'', and pursue a holistic approach to LLM security evaluation, where exploration and discovery of issues are central. To this end, this paper introduces garak (Generative AI Red-teaming and Assessment Kit), a framework which can be used to discover and identify vulnerabilities in a target LLM or dialog system. garak probes an LLM in a structured fashion to discover potential vulnerabilities. The outputs of the framework describe a target model's weaknesses, contribute to an informed discussion of what composes vulnerabilities in unique contexts, and can inform alignment and policy discussions for LLM deployment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11036v1-abstract-full').style.display = 'none'; document.getElementById('2406.11036v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://garak.ai</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.14577">arXiv:2405.14577</a> <span> [<a href="https://arxiv.org/pdf/2405.14577">pdf</a>, <a href="https://arxiv.org/format/2405.14577">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Representation Noising: A Defence Mechanism Against Harmful Finetuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rosati%2C+D">Domenic Rosati</a>, <a href="/search/cs?searchtype=author&query=Wehner%2C+J">Jan Wehner</a>, <a href="/search/cs?searchtype=author&query=Williams%2C+K">Kai Williams</a>, <a href="/search/cs?searchtype=author&query=Bartoszcze%2C+%C5%81">艁ukasz Bartoszcze</a>, <a href="/search/cs?searchtype=author&query=Atanasov%2C+D">David Atanasov</a>, <a href="/search/cs?searchtype=author&query=Gonzales%2C+R">Robie Gonzales</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Subhabrata Majumdar</a>, <a href="/search/cs?searchtype=author&query=Maple%2C+C">Carsten Maple</a>, <a href="/search/cs?searchtype=author&query=Sajjad%2C+H">Hassan Sajjad</a>, <a href="/search/cs?searchtype=author&query=Rudzicz%2C+F">Frank Rudzicz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.14577v4-abstract-short" style="display: inline;"> Releasing open-source large language models (LLMs) presents a dual-use risk since bad actors can easily fine-tune these models for harmful purposes. Even without the open release of weights, weight stealing and fine-tuning APIs make closed models vulnerable to harmful fine-tuning attacks (HFAs). While safety measures like preventing jailbreaks and improving safety guardrails are important, such me… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14577v4-abstract-full').style.display = 'inline'; document.getElementById('2405.14577v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.14577v4-abstract-full" style="display: none;"> Releasing open-source large language models (LLMs) presents a dual-use risk since bad actors can easily fine-tune these models for harmful purposes. Even without the open release of weights, weight stealing and fine-tuning APIs make closed models vulnerable to harmful fine-tuning attacks (HFAs). While safety measures like preventing jailbreaks and improving safety guardrails are important, such measures can easily be reversed through fine-tuning. In this work, we propose Representation Noising (RepNoise), a defence mechanism that operates even when attackers have access to the weights. RepNoise works by removing information about harmful representations such that it is difficult to recover them during fine-tuning. Importantly, our defence is also able to generalize across different subsets of harm that have not been seen during the defence process as long as they are drawn from the same distribution of the attack set. Our method does not degrade the general capability of LLMs and retains the ability to train the model on harmless tasks. We provide empirical evidence that the efficacy of our defence lies in its ``depth'': the degree to which information about harmful representations is removed across all layers of the LLM. We also find areas where RepNoise still remains ineffective and highlight how those limitations can inform future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14577v4-abstract-full').style.display = 'none'; document.getElementById('2405.14577v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in NeurIPs 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.05495">arXiv:2405.05495</a> <span> [<a href="https://arxiv.org/pdf/2405.05495">pdf</a>, <a href="https://arxiv.org/format/2405.05495">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Other Computer Science">cs.OH</span> </div> </div> <p class="title is-5 mathjax"> PARSAC: Fast, Human-quality Floorplanning for Modern SoCs with Complex Design Constraints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mostafa%2C+H">Hesham Mostafa</a>, <a href="/search/cs?searchtype=author&query=Mallappa%2C+U">Uday Mallappa</a>, <a href="/search/cs?searchtype=author&query=Galkin%2C+M">Mikhail Galkin</a>, <a href="/search/cs?searchtype=author&query=Phielipp%2C+M">Mariano Phielipp</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somdeb Majumdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.05495v3-abstract-short" style="display: inline;"> The floorplanning of Systems-on-a-Chip (SoCs) and of chip sub-systems is a crucial step in the physical design flow as it determines the optimal shapes and locations of the blocks that make up the system. Simulated Annealing (SA) has been the method of choice for tackling classical floorplanning problems where the objective is to minimize wire-length and the total placement area. The goal in indus… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05495v3-abstract-full').style.display = 'inline'; document.getElementById('2405.05495v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.05495v3-abstract-full" style="display: none;"> The floorplanning of Systems-on-a-Chip (SoCs) and of chip sub-systems is a crucial step in the physical design flow as it determines the optimal shapes and locations of the blocks that make up the system. Simulated Annealing (SA) has been the method of choice for tackling classical floorplanning problems where the objective is to minimize wire-length and the total placement area. The goal in industry-relevant floorplanning problems, however, is not only to minimize area and wire-length, but to do that while respecting hard placement constraints that specify the general area and/or the specific locations for the placement of some blocks. We show that simply incorporating these constraints into the SA objective function leads to sub-optimal, and often illegal, solutions. We propose the Constraints-Aware Simulated Annealing (CA-SA) method and show that it strongly outperforms vanilla SA in floorplanning problems with hard placement constraints. We developed a new floorplanning tool on top of CA-SA: PARSAC (Parallel Simulated Annealing with Constraints). PARSAC is an efficient, easy-to-use, and massively parallel floorplanner. Unlike current SA-based or learning-based floorplanning tools that cannot effectively incorporate hard placement-constraints, PARSAC can quickly construct the Pareto-optimal legal solutions front for constrained floorplanning problems. PARSAC also outperforms traditional SA on legacy floorplanning benchmarks. PARSAC is available as an open-source repository for researchers to replicate and build on our result. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05495v3-abstract-full').style.display = 'none'; document.getElementById('2405.05495v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.05480">arXiv:2405.05480</a> <span> [<a href="https://arxiv.org/pdf/2405.05480">pdf</a>, <a href="https://arxiv.org/format/2405.05480">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FloorSet -- a VLSI Floorplanning Dataset with Design Constraints of Real-World SoCs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mallappa%2C+U">Uday Mallappa</a>, <a href="/search/cs?searchtype=author&query=Mostafa%2C+H">Hesham Mostafa</a>, <a href="/search/cs?searchtype=author&query=Galkin%2C+M">Mikhail Galkin</a>, <a href="/search/cs?searchtype=author&query=Phielipp%2C+M">Mariano Phielipp</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somdeb Majumdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.05480v4-abstract-short" style="display: inline;"> Floorplanning for systems-on-a-chip (SoCs) and its sub-systems is a crucial and non-trivial step of the physical design flow. It represents a difficult combinatorial optimization problem. A typical large scale SoC with 120 partitions generates a search-space of nearly 10E250. As novel machine learning (ML) approaches emerge to tackle such problems, there is a growing need for a modern benchmark th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05480v4-abstract-full').style.display = 'inline'; document.getElementById('2405.05480v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.05480v4-abstract-full" style="display: none;"> Floorplanning for systems-on-a-chip (SoCs) and its sub-systems is a crucial and non-trivial step of the physical design flow. It represents a difficult combinatorial optimization problem. A typical large scale SoC with 120 partitions generates a search-space of nearly 10E250. As novel machine learning (ML) approaches emerge to tackle such problems, there is a growing need for a modern benchmark that comprises a large training dataset and performance metrics that better reflect real-world constraints and objectives compared to existing benchmarks. To address this need, we present FloorSet -- two comprehensive datasets of synthetic fixed-outline floorplan layouts that reflect the distribution of real SoCs. Each dataset has 1M training samples and 100 test samples where each sample is a synthetic floor-plan. FloorSet-Prime comprises fully-abutted rectilinear partitions and near-optimal wire-length. A simplified dataset that reflects early design phases, FloorSet-Lite comprises rectangular partitions, with under 5 percent white-space and near-optimal wire-length. Both datasets define hard constraints seen in modern design flows such as shape constraints, edge-affinity, grouping constraints, and pre-placement constraints. FloorSet is intended to spur fundamental research on large-scale constrained optimization problems. Crucially, FloorSet alleviates the core issue of reproducibility in modern ML driven solutions to such problems. FloorSet is available as an open-source repository for the research community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05480v4-abstract-full').style.display = 'none'; document.getElementById('2405.05480v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.05085">arXiv:2405.05085</a> <span> [<a href="https://arxiv.org/pdf/2405.05085">pdf</a>, <a href="https://arxiv.org/format/2405.05085">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Fair Voting Outcomes with Impact and Novelty Compromises? Unraveling Biases in Electing Participatory Budgeting Winners </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Maharjan%2C+S">Sajan Maharjan</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Srijoni Majumdar</a>, <a href="/search/cs?searchtype=author&query=Pournaras%2C+E">Evangelos Pournaras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.05085v3-abstract-short" style="display: inline;"> Participatory budgeting, as a paradigm for democratic innovations, engages citizens in the distribution of a public budget to projects, which they propose and vote for implementation. So far, voting algorithms have been proposed and studied in social choice literature to elect projects that are popular, while others prioritize on a proportional representation of voters' preferences, for instance,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05085v3-abstract-full').style.display = 'inline'; document.getElementById('2405.05085v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.05085v3-abstract-full" style="display: none;"> Participatory budgeting, as a paradigm for democratic innovations, engages citizens in the distribution of a public budget to projects, which they propose and vote for implementation. So far, voting algorithms have been proposed and studied in social choice literature to elect projects that are popular, while others prioritize on a proportional representation of voters' preferences, for instance, the rule of equal shares. However, the anticipated impact and novelty in the broader society by the winning projects, as selected by different algorithms, remains totally under-explored, lacking both a universal theory of impact for voting and a rigorous unifying framework for impact and novelty assessments. This paper tackles this grand challenge towards new axiomatic foundations for designing effective and fair voting methods. This is via new and striking insights derived from a large-scale analysis of biases over 345 real-world voting outcomes, characterized for the first time by a novel portfolio of impact and novelty metrics. We find strong causal evidence that equal shares comes with impact loss in several infrastructural projects of different cost levels that have been so far over-represented. However, it also comes with a novel, yet over-represented, impact gain in welfare, education and culture. We discuss broader implications of these results and how impact loss can be mitigated at the stage of campaign design and project ideation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05085v3-abstract-full').style.display = 'none'; document.getElementById('2405.05085v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">41 pages, 19 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.05482">arXiv:2404.05482</a> <span> [<a href="https://arxiv.org/pdf/2404.05482">pdf</a>, <a href="https://arxiv.org/format/2404.05482">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> WaveCatBoost for Probabilistic Forecasting of Regional Air Quality Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Borah%2C+J">Jintu Borah</a>, <a href="/search/cs?searchtype=author&query=Chakraborty%2C+T">Tanujit Chakraborty</a>, <a href="/search/cs?searchtype=author&query=Nadzir%2C+M+S+M">Md. Shahrul Md. Nadzir</a>, <a href="/search/cs?searchtype=author&query=Cayetano%2C+M+G">Mylene G. Cayetano</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Shubhankar Majumdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.05482v1-abstract-short" style="display: inline;"> Accurate and reliable air quality forecasting is essential for protecting public health, sustainable development, pollution control, and enhanced urban planning. This letter presents a novel WaveCatBoost architecture designed to forecast the real-time concentrations of air pollutants by combining the maximal overlapping discrete wavelet transform (MODWT) with the CatBoost model. This hybrid approa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05482v1-abstract-full').style.display = 'inline'; document.getElementById('2404.05482v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.05482v1-abstract-full" style="display: none;"> Accurate and reliable air quality forecasting is essential for protecting public health, sustainable development, pollution control, and enhanced urban planning. This letter presents a novel WaveCatBoost architecture designed to forecast the real-time concentrations of air pollutants by combining the maximal overlapping discrete wavelet transform (MODWT) with the CatBoost model. This hybrid approach efficiently transforms time series into high-frequency and low-frequency components, thereby extracting signal from noise and improving prediction accuracy and robustness. Evaluation of two distinct regional datasets, from the Central Air Pollution Control Board (CPCB) sensor network and a low-cost air quality sensor system (LAQS), underscores the superior performance of our proposed methodology in real-time forecasting compared to the state-of-the-art statistical and deep learning architectures. Moreover, we employ a conformal prediction strategy to provide probabilistic bands with our forecasts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05482v1-abstract-full').style.display = 'none'; document.getElementById('2404.05482v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.05947">arXiv:2401.05947</a> <span> [<a href="https://arxiv.org/pdf/2401.05947">pdf</a>, <a href="https://arxiv.org/format/2401.05947">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Send Message to the Future? Blockchain-based Time Machines for Decentralized Reveal of Locked Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuolun Li</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Srijoni Majumdar</a>, <a href="/search/cs?searchtype=author&query=Pournaras%2C+E">Evangelos Pournaras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.05947v4-abstract-short" style="display: inline;"> Conditional information reveal systems automate the release of information upon meeting specific predefined conditions, such as time or location. This paper introduces a breakthrough in the understanding, design, and application of conditional information reveal systems that are highly secure and decentralized. By designing a new practical timed-release cryptography system and a secret sharing sch… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05947v4-abstract-full').style.display = 'inline'; document.getElementById('2401.05947v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.05947v4-abstract-full" style="display: none;"> Conditional information reveal systems automate the release of information upon meeting specific predefined conditions, such as time or location. This paper introduces a breakthrough in the understanding, design, and application of conditional information reveal systems that are highly secure and decentralized. By designing a new practical timed-release cryptography system and a secret sharing scheme with reveal-verifiability, a novel data sharing system is devised on the blockchain that "sends messages in the future" with highly accurate decryption times. Notably, the proposed secret sharing scheme applies to other applications requiring verifiability of revealed secret shares. This paper provides a complete evaluation portfolio of this pioneering paradigm, including analytical results, a validation of its robustness in the Tamarin Prover and a performance evaluation of a real-world, open-source system prototype deployed across the globe. Using real-world election data, we also demonstrate the applicability of this innovative system in e-voting, illustrating its capacity to secure and ensure fair electronic voting processes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05947v4-abstract-full').style.display = 'none'; document.getElementById('2401.05947v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.17279">arXiv:2312.17279</a> <span> [<a href="https://arxiv.org/pdf/2312.17279">pdf</a>, <a href="https://arxiv.org/format/2312.17279">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Stateful Conformer with Cache-based Inference for Streaming Automatic Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Noroozi%2C+V">Vahid Noroozi</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somshubra Majumdar</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Ankur Kumar</a>, <a href="/search/cs?searchtype=author&query=Balam%2C+J">Jagadeesh Balam</a>, <a href="/search/cs?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.17279v3-abstract-short" style="display: inline;"> In this paper, we propose an efficient and accurate streaming speech recognition model based on the FastConformer architecture. We adapted the FastConformer architecture for streaming applications through: (1) constraining both the look-ahead and past contexts in the encoder, and (2) introducing an activation caching mechanism to enable the non-autoregressive encoder to operate autoregressively du… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.17279v3-abstract-full').style.display = 'inline'; document.getElementById('2312.17279v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.17279v3-abstract-full" style="display: none;"> In this paper, we propose an efficient and accurate streaming speech recognition model based on the FastConformer architecture. We adapted the FastConformer architecture for streaming applications through: (1) constraining both the look-ahead and past contexts in the encoder, and (2) introducing an activation caching mechanism to enable the non-autoregressive encoder to operate autoregressively during inference. The proposed model is thoughtfully designed in a way to eliminate the accuracy disparity between the train and inference time which is common for many streaming models. Furthermore, our proposed encoder works with various decoder configurations including Connectionist Temporal Classification (CTC) and RNN-Transducer (RNNT) decoders. Additionally, we introduced a hybrid CTC/RNNT architecture which utilizes a shared encoder with both a CTC and RNNT decoder to boost the accuracy and save computation. We evaluate the proposed model on LibriSpeech dataset and a multi-domain large scale dataset and demonstrate that it can achieve better accuracy with lower latency and inference time compared to a conventional buffered streaming model baseline. We also showed that training a model with multiple latencies can achieve better accuracy than single latency models while it enables us to support multiple latencies with a single model. Our experiments also showed the hybrid architecture would not only speedup the convergence of the CTC decoder but also improves the accuracy of streaming models compared to single decoder models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.17279v3-abstract-full').style.display = 'none'; document.getElementById('2312.17279v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Shorter version accepted to ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.03374">arXiv:2311.03374</a> <span> [<a href="https://arxiv.org/pdf/2311.03374">pdf</a>, <a href="https://arxiv.org/format/2311.03374">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Generative AI for Software Metadata: Overview of the Information Retrieval in Software Engineering Track at FIRE 2023 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Srijoni Majumdar</a>, <a href="/search/cs?searchtype=author&query=Paul%2C+S">Soumen Paul</a>, <a href="/search/cs?searchtype=author&query=Paul%2C+D">Debjyoti Paul</a>, <a href="/search/cs?searchtype=author&query=Bandyopadhyay%2C+A">Ayan Bandyopadhyay</a>, <a href="/search/cs?searchtype=author&query=Chattopadhyay%2C+S">Samiran Chattopadhyay</a>, <a href="/search/cs?searchtype=author&query=Das%2C+P+P">Partha Pratim Das</a>, <a href="/search/cs?searchtype=author&query=Clough%2C+P+D">Paul D Clough</a>, <a href="/search/cs?searchtype=author&query=Majumder%2C+P">Prasenjit Majumder</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.03374v1-abstract-short" style="display: inline;"> The Information Retrieval in Software Engineering (IRSE) track aims to develop solutions for automated evaluation of code comments in a machine learning framework based on human and large language model generated labels. In this track, there is a binary classification task to classify comments as useful and not useful. The dataset consists of 9048 code comments and surrounding code snippet pairs e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.03374v1-abstract-full').style.display = 'inline'; document.getElementById('2311.03374v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.03374v1-abstract-full" style="display: none;"> The Information Retrieval in Software Engineering (IRSE) track aims to develop solutions for automated evaluation of code comments in a machine learning framework based on human and large language model generated labels. In this track, there is a binary classification task to classify comments as useful and not useful. The dataset consists of 9048 code comments and surrounding code snippet pairs extracted from open source github C based projects and an additional dataset generated individually by teams using large language models. Overall 56 experiments have been submitted by 17 teams from various universities and software companies. The submissions have been evaluated quantitatively using the F1-Score and qualitatively based on the type of features developed, the supervised learning model used and their corresponding hyper-parameters. The labels generated from large language models increase the bias in the prediction model but lead to less over-fitted results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.03374v1-abstract-full').style.display = 'none'; document.getElementById('2311.03374v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Overview Paper of the Information Retrieval of Software Engineering Track at the Forum for Information Retrieval, 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.17152">arXiv:2310.17152</a> <span> [<a href="https://arxiv.org/pdf/2310.17152">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Technical Note: Feasibility of translating 3.0T-trained Deep-Learning Segmentation Models Out-of-the-Box on Low-Field MRI 0.55T Knee-MRI of Healthy Controls </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bhattacharjee%2C+R">Rupsa Bhattacharjee</a>, <a href="/search/cs?searchtype=author&query=Akkaya%2C+Z">Zehra Akkaya</a>, <a href="/search/cs?searchtype=author&query=Luitjens%2C+J">Johanna Luitjens</a>, <a href="/search/cs?searchtype=author&query=Su%2C+P">Pan Su</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yang Yang</a>, <a href="/search/cs?searchtype=author&query=Pedoia%2C+V">Valentina Pedoia</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Sharmila Majumdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.17152v1-abstract-short" style="display: inline;"> In the current study, our purpose is to evaluate the feasibility of applying deep learning (DL) enabled algorithms to quantify bilateral knee biomarkers in healthy controls scanned at 0.55T and compared with 3.0T. The current study assesses the performance of standard in-practice bone, and cartilage segmentation algorithms at 0.55T, both qualitatively and quantitatively, in terms of comparing segm… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17152v1-abstract-full').style.display = 'inline'; document.getElementById('2310.17152v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.17152v1-abstract-full" style="display: none;"> In the current study, our purpose is to evaluate the feasibility of applying deep learning (DL) enabled algorithms to quantify bilateral knee biomarkers in healthy controls scanned at 0.55T and compared with 3.0T. The current study assesses the performance of standard in-practice bone, and cartilage segmentation algorithms at 0.55T, both qualitatively and quantitatively, in terms of comparing segmentation performance, areas of improvement, and compartment-wise cartilage thickness values between 0.55T vs. 3.0T. Initial results demonstrate a usable to good technical feasibility of translating existing quantitative deep-learning-based image segmentation techniques, trained on 3.0T, out of 0.55T for knee MRI, in a multi-vendor acquisition environment. Especially in terms of segmenting cartilage compartments, the models perform almost equivalent to 3.0T in terms of Likert ranking. The 0.55T low-field sustainable and easy-to-install MRI, as demonstrated, thus, can be utilized for evaluating knee cartilage thickness and bone segmentations aided by established DL algorithms trained at higher-field strengths out-of-the-box initially. This could be utilized at the far-spread point-of-care locations with a lack of radiologists available to manually segment low-field images, at least till a decent base of low-field data pool is collated. With further fine-tuning with manual labeling of low-field data or utilizing synthesized higher SNR images from low-field images, OA biomarker quantification performance is potentially guaranteed to be further improved. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17152v1-abstract-full').style.display = 'none'; document.getElementById('2310.17152v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 Pages, 3 Figures, 2 Tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.09950">arXiv:2309.09950</a> <span> [<a href="https://arxiv.org/pdf/2309.09950">pdf</a>, <a href="https://arxiv.org/format/2309.09950">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Investigating End-to-End ASR Architectures for Long Form Audio Transcription </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Koluguri%2C+N+R">Nithin Rao Koluguri</a>, <a href="/search/cs?searchtype=author&query=Kriman%2C+S">Samuel Kriman</a>, <a href="/search/cs?searchtype=author&query=Zelenfroind%2C+G">Georgy Zelenfroind</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somshubra Majumdar</a>, <a href="/search/cs?searchtype=author&query=Rekesh%2C+D">Dima Rekesh</a>, <a href="/search/cs?searchtype=author&query=Noroozi%2C+V">Vahid Noroozi</a>, <a href="/search/cs?searchtype=author&query=Balam%2C+J">Jagadeesh Balam</a>, <a href="/search/cs?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.09950v2-abstract-short" style="display: inline;"> This paper presents an overview and evaluation of some of the end-to-end ASR models on long-form audios. We study three categories of Automatic Speech Recognition(ASR) models based on their core architecture: (1) convolutional, (2) convolutional with squeeze-and-excitation and (3) convolutional models with attention. We selected one ASR model from each category and evaluated Word Error Rate, maxim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.09950v2-abstract-full').style.display = 'inline'; document.getElementById('2309.09950v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.09950v2-abstract-full" style="display: none;"> This paper presents an overview and evaluation of some of the end-to-end ASR models on long-form audios. We study three categories of Automatic Speech Recognition(ASR) models based on their core architecture: (1) convolutional, (2) convolutional with squeeze-and-excitation and (3) convolutional models with attention. We selected one ASR model from each category and evaluated Word Error Rate, maximum audio length and real-time factor for each model on a variety of long audio benchmarks: Earnings-21 and 22, CORAAL, and TED-LIUM3. The model from the category of self-attention with local attention and global token has the best accuracy comparing to other architectures. We also compared models with CTC and RNNT decoders and showed that CTC-based models are more robust and efficient than RNNT on long form audio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.09950v2-abstract-full').style.display = 'none'; document.getElementById('2309.09950v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">PrePrint. Submitted to ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.09138">arXiv:2308.09138</a> <span> [<a href="https://arxiv.org/pdf/2308.09138">pdf</a>, <a href="https://arxiv.org/format/2308.09138">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Semantic Consistency for Assuring Reliability of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Raj%2C+H">Harsh Raj</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+V">Vipul Gupta</a>, <a href="/search/cs?searchtype=author&query=Rosati%2C+D">Domenic Rosati</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Subhabrata Majumdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.09138v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) exhibit remarkable fluency and competence across various natural language tasks. However, recent research has highlighted their sensitivity to variations in input prompts. To deploy LLMs in a safe and reliable manner, it is crucial for their outputs to be consistent when prompted with expressions that carry the same meaning or intent. While some existing work has explo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.09138v1-abstract-full').style.display = 'inline'; document.getElementById('2308.09138v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.09138v1-abstract-full" style="display: none;"> Large Language Models (LLMs) exhibit remarkable fluency and competence across various natural language tasks. However, recent research has highlighted their sensitivity to variations in input prompts. To deploy LLMs in a safe and reliable manner, it is crucial for their outputs to be consistent when prompted with expressions that carry the same meaning or intent. While some existing work has explored how state-of-the-art LLMs address this issue, their evaluations have been confined to assessing lexical equality of single- or multi-word answers, overlooking the consistency of generative text sequences. For a more comprehensive understanding of the consistency of LLMs in open-ended text generation scenarios, we introduce a general measure of semantic consistency, and formulate multiple versions of this metric to evaluate the performance of various LLMs. Our proposal demonstrates significantly higher consistency and stronger correlation with human evaluations of output consistency than traditional metrics based on lexical consistency. Finally, we propose a novel prompting strategy, called Ask-to-Choose (A2C), to enhance semantic consistency. When evaluated for closed-book question answering based on answer variations from the TruthfulQA benchmark, A2C increases accuracy metrics for pretrained and finetuned LLMs by up to 47%, and semantic consistency metrics for instruction-tuned models by up to 7-fold. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.09138v1-abstract-full').style.display = 'none'; document.getElementById('2308.09138v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.06653">arXiv:2308.06653</a> <span> [<a href="https://arxiv.org/pdf/2308.06653">pdf</a>, <a href="https://arxiv.org/format/2308.06653">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Smart Knowledge Transfer using Google-like Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Srijoni Majumdar</a>, <a href="/search/cs?searchtype=author&query=Das%2C+P+P">Partha Pratim Das</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.06653v1-abstract-short" style="display: inline;"> To address the issue of rising software maintenance cost due to program comprehension challenges, we propose SMARTKT (Smart Knowledge Transfer), a search framework, which extracts and integrates knowledge related to various aspects of an application in form of a semantic graph. This graph supports syntax and semantic queries and converts the process of program comprehension into a {\em google-like… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06653v1-abstract-full').style.display = 'inline'; document.getElementById('2308.06653v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.06653v1-abstract-full" style="display: none;"> To address the issue of rising software maintenance cost due to program comprehension challenges, we propose SMARTKT (Smart Knowledge Transfer), a search framework, which extracts and integrates knowledge related to various aspects of an application in form of a semantic graph. This graph supports syntax and semantic queries and converts the process of program comprehension into a {\em google-like} search problem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06653v1-abstract-full').style.display = 'none'; document.getElementById('2308.06653v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">3 pages, 2 figures, accepted in the NDLI-UNESCO International Symposium on Knowledge Engineering for Digital Library Design 2019 (KEDL) as an extended abstract and poster</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.12915">arXiv:2307.12915</a> <span> [<a href="https://arxiv.org/pdf/2307.12915">pdf</a>, <a href="https://arxiv.org/format/2307.12915">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Consensus-based Participatory Budgeting for Legitimacy: Decision Support via Multi-agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Srijoni Majumdar</a>, <a href="/search/cs?searchtype=author&query=Pournaras%2C+E">Evangelos Pournaras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.12915v1-abstract-short" style="display: inline;"> The legitimacy of bottom-up democratic processes for the distribution of public funds by policy-makers is challenging and complex. Participatory budgeting is such a process, where voting outcomes may not always be fair or inclusive. Deliberation for which project ideas to put for voting and choose for implementation lack systematization and do not scale. This paper addresses these grand challenges… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.12915v1-abstract-full').style.display = 'inline'; document.getElementById('2307.12915v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.12915v1-abstract-full" style="display: none;"> The legitimacy of bottom-up democratic processes for the distribution of public funds by policy-makers is challenging and complex. Participatory budgeting is such a process, where voting outcomes may not always be fair or inclusive. Deliberation for which project ideas to put for voting and choose for implementation lack systematization and do not scale. This paper addresses these grand challenges by introducing a novel and legitimate iterative consensus-based participatory budgeting process. Consensus is designed to be a result of decision support via an innovative multi-agent reinforcement learning approach. Voters are assisted to interact with each other to make viable compromises. Extensive experimental evaluation with real-world participatory budgeting data from Poland reveal striking findings: Consensus is reachable, efficient and robust. Compromise is required, which is though comparable to the one of existing voting aggregation methods that promote fairness and inclusion without though attaining consensus. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.12915v1-abstract-full').style.display = 'none'; document.getElementById('2307.12915v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 Pages, 8 Figures, 3 Tables, Accepted in International Conference on Machine Learning, Optimization, and Data Science, 2023</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> International Conference on Machine Learning, Optimization, and Data Science, 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.08412">arXiv:2307.08412</a> <span> [<a href="https://arxiv.org/pdf/2307.08412">pdf</a>, <a href="https://arxiv.org/format/2307.08412">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> A Privacy-Preserving Blockchain-based E-voting System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mukherjee%2C+A">Arnab Mukherjee</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Souvik Majumdar</a>, <a href="/search/cs?searchtype=author&query=Kolya%2C+A+K">Anup Kumar Kolya</a>, <a href="/search/cs?searchtype=author&query=Nandi%2C+S">Saborni Nandi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.08412v1-abstract-short" style="display: inline;"> Within a modern democratic nation, elections play a significant role in the nation's functioning. However, with the existing infrastructure for conducting elections using Electronic Voting Systems (EVMs), many loopholes exist, which illegitimate entities might leverage to cast false votes or even tamper with the EVMs after the voting session is complete. The need of the hour is to introduce a robu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.08412v1-abstract-full').style.display = 'inline'; document.getElementById('2307.08412v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.08412v1-abstract-full" style="display: none;"> Within a modern democratic nation, elections play a significant role in the nation's functioning. However, with the existing infrastructure for conducting elections using Electronic Voting Systems (EVMs), many loopholes exist, which illegitimate entities might leverage to cast false votes or even tamper with the EVMs after the voting session is complete. The need of the hour is to introduce a robust, auditable, transparent, and tamper-proof e-voting system, enabling a more reliable and fair election process. To address such concerns, we propose a novel solution for blockchain-based e-voting, focusing on the security and privacy aspects of the e-voting process. We consider the security risks and loopholes and aim to preserve the anonymity of the voters while ensuring that illegitimate votes are properly handled. Additionally, we develop a prototype as a proof of concept using the Ethereum blockchain platform. Finally, we perform experiments to demonstrate the performance of the system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.08412v1-abstract-full').style.display = 'none'; document.getElementById('2307.08412v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.13696">arXiv:2306.13696</a> <span> [<a href="https://arxiv.org/pdf/2306.13696">pdf</a>, <a href="https://arxiv.org/format/2306.13696">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3598469.3598472">10.1145/3598469.3598472 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Improving City Life via Legitimate and Participatory Policy-making: A Data-driven Approach in Switzerland </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wellings%2C+T">Thomas Wellings</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Srijoni Majumdar</a>, <a href="/search/cs?searchtype=author&query=Fricker%2C+R+H">Regula H盲nggli Fricker</a>, <a href="/search/cs?searchtype=author&query=Pournaras%2C+E">Evangelos Pournaras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.13696v1-abstract-short" style="display: inline;"> This paper introduces a novel data-driven approach to address challenges faced by city policymakers concerning the distribution of public funds. Providing budgeting processes for improving quality of life based on objective (data-driven) evidence has been so far a missing element in policy-making. This paper focuses on a case study of 1,204 citizens in the city of Aarau, Switzerland, and analyzes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13696v1-abstract-full').style.display = 'inline'; document.getElementById('2306.13696v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.13696v1-abstract-full" style="display: none;"> This paper introduces a novel data-driven approach to address challenges faced by city policymakers concerning the distribution of public funds. Providing budgeting processes for improving quality of life based on objective (data-driven) evidence has been so far a missing element in policy-making. This paper focuses on a case study of 1,204 citizens in the city of Aarau, Switzerland, and analyzes survey data containing insightful indicators that can impact the legitimacy of decision-making. Our approach is twofold. On the one hand, we aim to optimize the legitimacy of policymakers' decisions by identifying the level of investment in neighborhoods and projects that offer the greatest return in legitimacy. To do so, we introduce a new context-independent legitimacy metric for policymakers. This metric allows us to distinguish decisive vs. indecisive collective preferences for neighborhoods or projects on which to invest, enabling policymakers to prioritize impactful bottom-up consultations and participatory initiatives (e.g., participatory budgeting). The metric also allows policymakers to identify the optimal number of investments in various project sectors and neighborhoods (in terms of legitimacy gain). On the other hand, we aim to offer guidance to policymakers concerning which satisfaction and participation factors influence citizens' quality of life through an accurate classification model and an evaluation of relocations. By doing so, policymakers may be able to further refine their strategy, making targeted investments with significant benefits to citizens' quality of life. These findings are expected to provide transformative insights for practicing direct democracy in Switzerland and a blueprint for policy-making to adopt worldwide. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13696v1-abstract-full').style.display = 'none'; document.getElementById('2306.13696v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 15 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 24th Annual International Conference on Digital Government Research (dg.o 2023) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.06283">arXiv:2306.06283</a> <span> [<a href="https://arxiv.org/pdf/2306.06283">pdf</a>, <a href="https://arxiv.org/format/2306.06283">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1039/D3DD00113J">10.1039/D3DD00113J <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> 14 Examples of How LLMs Can Transform Materials Science and Chemistry: A Reflection on a Large Language Model Hackathon </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jablonka%2C+K+M">Kevin Maik Jablonka</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+Q">Qianxiang Ai</a>, <a href="/search/cs?searchtype=author&query=Al-Feghali%2C+A">Alexander Al-Feghali</a>, <a href="/search/cs?searchtype=author&query=Badhwar%2C+S">Shruti Badhwar</a>, <a href="/search/cs?searchtype=author&query=Bocarsly%2C+J+D">Joshua D. Bocarsly</a>, <a href="/search/cs?searchtype=author&query=Bran%2C+A+M">Andres M Bran</a>, <a href="/search/cs?searchtype=author&query=Bringuier%2C+S">Stefan Bringuier</a>, <a href="/search/cs?searchtype=author&query=Brinson%2C+L+C">L. Catherine Brinson</a>, <a href="/search/cs?searchtype=author&query=Choudhary%2C+K">Kamal Choudhary</a>, <a href="/search/cs?searchtype=author&query=Circi%2C+D">Defne Circi</a>, <a href="/search/cs?searchtype=author&query=Cox%2C+S">Sam Cox</a>, <a href="/search/cs?searchtype=author&query=de+Jong%2C+W+A">Wibe A. de Jong</a>, <a href="/search/cs?searchtype=author&query=Evans%2C+M+L">Matthew L. Evans</a>, <a href="/search/cs?searchtype=author&query=Gastellu%2C+N">Nicolas Gastellu</a>, <a href="/search/cs?searchtype=author&query=Genzling%2C+J">Jerome Genzling</a>, <a href="/search/cs?searchtype=author&query=Gil%2C+M+V">Mar铆a Victoria Gil</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+A+K">Ankur K. Gupta</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Z">Zhi Hong</a>, <a href="/search/cs?searchtype=author&query=Imran%2C+A">Alishba Imran</a>, <a href="/search/cs?searchtype=author&query=Kruschwitz%2C+S">Sabine Kruschwitz</a>, <a href="/search/cs?searchtype=author&query=Labarre%2C+A">Anne Labarre</a>, <a href="/search/cs?searchtype=author&query=L%C3%A1la%2C+J">Jakub L谩la</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tao Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Steven Ma</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Sauradeep Majumdar</a> , et al. (28 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.06283v4-abstract-short" style="display: inline;"> Large-language models (LLMs) such as GPT-4 caught the interest of many scientists. Recent studies suggested that these models could be useful in chemistry and materials science. To explore these possibilities, we organized a hackathon. This article chronicles the projects built as part of this hackathon. Participants employed LLMs for various applications, including predicting properties of mole… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06283v4-abstract-full').style.display = 'inline'; document.getElementById('2306.06283v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.06283v4-abstract-full" style="display: none;"> Large-language models (LLMs) such as GPT-4 caught the interest of many scientists. Recent studies suggested that these models could be useful in chemistry and materials science. To explore these possibilities, we organized a hackathon. This article chronicles the projects built as part of this hackathon. Participants employed LLMs for various applications, including predicting properties of molecules and materials, designing novel interfaces for tools, extracting knowledge from unstructured data, and developing new educational applications. The diverse topics and the fact that working prototypes could be generated in less than two days highlight that LLMs will profoundly impact the future of our fields. The rich collection of ideas and projects also indicates that the applications of LLMs are not limited to materials science and chemistry but offer potential benefits to a wide range of scientific disciplines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06283v4-abstract-full').style.display = 'none'; document.getElementById('2306.06283v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.16993">arXiv:2305.16993</a> <span> [<a href="https://arxiv.org/pdf/2305.16993">pdf</a>, <a href="https://arxiv.org/ps/2305.16993">ps</a>, <a href="https://arxiv.org/format/2305.16993">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Discrete-choice Multi-agent Optimization: Decentralized Hard Constraint Satisfaction for Smart Cities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Srijoni Majumdar</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+C">Chuhao Qin</a>, <a href="/search/cs?searchtype=author&query=Pournaras%2C+E">Evangelos Pournaras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.16993v1-abstract-short" style="display: inline;"> Making Smart Cities more sustainable, resilient and democratic is emerging as an endeavor of satisfying hard constraints, for instance meeting net-zero targets. Decentralized multi-agent methods for socio-technical optimization of large-scale complex infrastructures such as energy and transport networks are scalable and more privacy-preserving by design. However, they mainly focus on satisfying so… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16993v1-abstract-full').style.display = 'inline'; document.getElementById('2305.16993v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.16993v1-abstract-full" style="display: none;"> Making Smart Cities more sustainable, resilient and democratic is emerging as an endeavor of satisfying hard constraints, for instance meeting net-zero targets. Decentralized multi-agent methods for socio-technical optimization of large-scale complex infrastructures such as energy and transport networks are scalable and more privacy-preserving by design. However, they mainly focus on satisfying soft constraints to remain cost-effective. This paper introduces a new model for decentralized hard constraint satisfaction in discrete-choice combinatorial optimization problems. The model solves the cold start problem of partial information for coordination during initialization that can violate hard constraints. It also preserves a low-cost satisfaction of hard constraints in subsequent coordinated choices during which soft constraints optimization is performed. Strikingly, experimental results in real-world Smart City application scenarios demonstrate the required behavioral shift to preserve optimality when hard constraints are satisfied. These findings are significant for policymakers, system operators, designers and architects to create the missing social capital of running cities in more viable trajectories. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16993v1-abstract-full').style.display = 'none'; document.getElementById('2305.16993v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 7 figures, Accepted for MSDM@AAMAS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.05084">arXiv:2305.05084</a> <span> [<a href="https://arxiv.org/pdf/2305.05084">pdf</a>, <a href="https://arxiv.org/format/2305.05084">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Fast Conformer with Linearly Scalable Attention for Efficient Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rekesh%2C+D">Dima Rekesh</a>, <a href="/search/cs?searchtype=author&query=Koluguri%2C+N+R">Nithin Rao Koluguri</a>, <a href="/search/cs?searchtype=author&query=Kriman%2C+S">Samuel Kriman</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somshubra Majumdar</a>, <a href="/search/cs?searchtype=author&query=Noroozi%2C+V">Vahid Noroozi</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">He Huang</a>, <a href="/search/cs?searchtype=author&query=Hrinchuk%2C+O">Oleksii Hrinchuk</a>, <a href="/search/cs?searchtype=author&query=Puvvada%2C+K">Krishna Puvvada</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Ankur Kumar</a>, <a href="/search/cs?searchtype=author&query=Balam%2C+J">Jagadeesh Balam</a>, <a href="/search/cs?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.05084v6-abstract-short" style="display: inline;"> Conformer-based models have become the dominant end-to-end architecture for speech processing tasks. With the objective of enhancing the conformer architecture for efficient training and inference, we carefully redesigned Conformer with a novel downsampling schema. The proposed model, named Fast Conformer(FC), is 2.8x faster than the original Conformer, supports scaling to Billion parameters witho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.05084v6-abstract-full').style.display = 'inline'; document.getElementById('2305.05084v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.05084v6-abstract-full" style="display: none;"> Conformer-based models have become the dominant end-to-end architecture for speech processing tasks. With the objective of enhancing the conformer architecture for efficient training and inference, we carefully redesigned Conformer with a novel downsampling schema. The proposed model, named Fast Conformer(FC), is 2.8x faster than the original Conformer, supports scaling to Billion parameters without any changes to the core architecture and also achieves state-of-the-art accuracy on Automatic Speech Recognition benchmarks. To enable transcription of long-form speech up to 11 hours, we replaced global attention with limited context attention post-training, while also improving accuracy through fine-tuning with the addition of a global token. Fast Conformer, when combined with a Transformer decoder also outperforms the original Conformer in accuracy and in speed for Speech Translation and Spoken Language Understanding. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.05084v6-abstract-full').style.display = 'none'; document.getElementById('2305.05084v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ASRU 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.07213">arXiv:2304.07213</a> <span> [<a href="https://arxiv.org/pdf/2304.07213">pdf</a>, <a href="https://arxiv.org/format/2304.07213">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.rse.2023.113888">10.1016/j.rse.2023.113888 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Very high resolution canopy height maps from RGB imagery using self-supervised vision transformer and convolutional decoder trained on Aerial Lidar </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tolan%2C+J">Jamie Tolan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hung-I Yang</a>, <a href="/search/cs?searchtype=author&query=Nosarzewski%2C+B">Ben Nosarzewski</a>, <a href="/search/cs?searchtype=author&query=Couairon%2C+G">Guillaume Couairon</a>, <a href="/search/cs?searchtype=author&query=Vo%2C+H">Huy Vo</a>, <a href="/search/cs?searchtype=author&query=Brandt%2C+J">John Brandt</a>, <a href="/search/cs?searchtype=author&query=Spore%2C+J">Justine Spore</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Sayantan Majumdar</a>, <a href="/search/cs?searchtype=author&query=Haziza%2C+D">Daniel Haziza</a>, <a href="/search/cs?searchtype=author&query=Vamaraju%2C+J">Janaki Vamaraju</a>, <a href="/search/cs?searchtype=author&query=Moutakanni%2C+T">Theo Moutakanni</a>, <a href="/search/cs?searchtype=author&query=Bojanowski%2C+P">Piotr Bojanowski</a>, <a href="/search/cs?searchtype=author&query=Johns%2C+T">Tracy Johns</a>, <a href="/search/cs?searchtype=author&query=White%2C+B">Brian White</a>, <a href="/search/cs?searchtype=author&query=Tiecke%2C+T">Tobias Tiecke</a>, <a href="/search/cs?searchtype=author&query=Couprie%2C+C">Camille Couprie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.07213v3-abstract-short" style="display: inline;"> Vegetation structure mapping is critical for understanding the global carbon cycle and monitoring nature-based approaches to climate adaptation and mitigation. Repeated measurements of these data allow for the observation of deforestation or degradation of existing forests, natural forest regeneration, and the implementation of sustainable agricultural practices like agroforestry. Assessments of t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.07213v3-abstract-full').style.display = 'inline'; document.getElementById('2304.07213v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.07213v3-abstract-full" style="display: none;"> Vegetation structure mapping is critical for understanding the global carbon cycle and monitoring nature-based approaches to climate adaptation and mitigation. Repeated measurements of these data allow for the observation of deforestation or degradation of existing forests, natural forest regeneration, and the implementation of sustainable agricultural practices like agroforestry. Assessments of tree canopy height and crown projected area at a high spatial resolution are also important for monitoring carbon fluxes and assessing tree-based land uses, since forest structures can be highly spatially heterogeneous, especially in agroforestry systems. Very high resolution satellite imagery (less than one meter (1m) Ground Sample Distance) makes it possible to extract information at the tree level while allowing monitoring at a very large scale. This paper presents the first high-resolution canopy height map concurrently produced for multiple sub-national jurisdictions. Specifically, we produce very high resolution canopy height maps for the states of California and Sao Paulo, a significant improvement in resolution over the ten meter (10m) resolution of previous Sentinel / GEDI based worldwide maps of canopy height. The maps are generated by the extraction of features from a self-supervised model trained on Maxar imagery from 2017 to 2020, and the training of a dense prediction decoder against aerial lidar maps. We also introduce a post-processing step using a convolutional network trained on GEDI observations. We evaluate the proposed maps with set-aside validation lidar data as well as by comparing with other remotely sensed maps and field-collected data, and find our model produces an average Mean Absolute Error (MAE) of 2.8 meters and Mean Error (ME) of 0.6 meters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.07213v3-abstract-full').style.display = 'none'; document.getElementById('2304.07213v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Remote Sensing of Environment 300, 113888, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.06795">arXiv:2304.06795</a> <span> [<a href="https://arxiv.org/pdf/2304.06795">pdf</a>, <a href="https://arxiv.org/format/2304.06795">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Efficient Sequence Transduction by Jointly Predicting Tokens and Durations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hainan Xu</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+F">Fei Jia</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somshubra Majumdar</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">He Huang</a>, <a href="/search/cs?searchtype=author&query=Watanabe%2C+S">Shinji Watanabe</a>, <a href="/search/cs?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.06795v2-abstract-short" style="display: inline;"> This paper introduces a novel Token-and-Duration Transducer (TDT) architecture for sequence-to-sequence tasks. TDT extends conventional RNN-Transducer architectures by jointly predicting both a token and its duration, i.e. the number of input frames covered by the emitted token. This is achieved by using a joint network with two outputs which are independently normalized to generate distributions… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.06795v2-abstract-full').style.display = 'inline'; document.getElementById('2304.06795v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.06795v2-abstract-full" style="display: none;"> This paper introduces a novel Token-and-Duration Transducer (TDT) architecture for sequence-to-sequence tasks. TDT extends conventional RNN-Transducer architectures by jointly predicting both a token and its duration, i.e. the number of input frames covered by the emitted token. This is achieved by using a joint network with two outputs which are independently normalized to generate distributions over tokens and durations. During inference, TDT models can skip input frames guided by the predicted duration output, which makes them significantly faster than conventional Transducers which process the encoder output frame by frame. TDT models achieve both better accuracy and significantly faster inference than conventional Transducers on different sequence transduction tasks. TDT models for Speech Recognition achieve better accuracy and up to 2.82X faster inference than conventional Transducers. TDT models for Speech Translation achieve an absolute gain of over 1 BLEU on the MUST-C test compared with conventional Transducers, and its inference is 2.27X faster. In Speech Intent Classification and Slot Filling tasks, TDT models improve the intent accuracy by up to over 1% (absolute) over conventional Transducers, while running up to 1.28X faster. Our implementation of the TDT model will be open-sourced with the NeMo (https://github.com/NVIDIA/NeMo) toolkit. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.06795v2-abstract-full').style.display = 'none'; document.getElementById('2304.06795v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.08535">arXiv:2303.08535</a> <span> [<a href="https://arxiv.org/pdf/2303.08535">pdf</a>, <a href="https://arxiv.org/format/2303.08535">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Statistical Mechanics">cond-mat.stat-mech</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Analysis, Statistics and Probability">physics.data-an</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1088/1751-8121/acd829">10.1088/1751-8121/acd829 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Singular relaxation of a random walk in a box with a Metropolis Monte Carlo dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chepelianskii%2C+A+D">Alexei D. Chepelianskii</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S+N">Satya N. Majumdar</a>, <a href="/search/cs?searchtype=author&query=Schawe%2C+H">Hendrik Schawe</a>, <a href="/search/cs?searchtype=author&query=Trizac%2C+E">Emmanuel Trizac</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.08535v1-abstract-short" style="display: inline;"> We study analytically the relaxation eigenmodes of a simple Monte Carlo algorithm, corresponding to a particle in a box which moves by uniform random jumps. Moves outside of the box are rejected. At long times, the system approaches the equilibrium probability density, which is uniform inside the box. We show that the relaxation towards this equilibrium is unusual: for a jump length comparable to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08535v1-abstract-full').style.display = 'inline'; document.getElementById('2303.08535v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.08535v1-abstract-full" style="display: none;"> We study analytically the relaxation eigenmodes of a simple Monte Carlo algorithm, corresponding to a particle in a box which moves by uniform random jumps. Moves outside of the box are rejected. At long times, the system approaches the equilibrium probability density, which is uniform inside the box. We show that the relaxation towards this equilibrium is unusual: for a jump length comparable to the size of the box, the number of relaxation eigenmodes can be surprisingly small, one or two. We provide a complete analytic description of the transition between these two regimes. When only a single relaxation eigenmode is present, a suitable choice of the symmetry of the initial conditions gives a localizing decay to equilibrium. In this case, the deviation from equilibrium concentrates at the edges of the box where the rejection probability is maximal. Finally, in addition to the relaxation analysis of the master equation, we also describe the full eigen-spectrum of the master equation including its sub-leading eigen-modes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08535v1-abstract-full').style.display = 'none'; document.getElementById('2303.08535v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.13419">arXiv:2211.13419</a> <span> [<a href="https://arxiv.org/pdf/2211.13419">pdf</a>, <a href="https://arxiv.org/format/2211.13419">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Network Security Modelling with Distributional Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Subhabrata Majumdar</a>, <a href="/search/cs?searchtype=author&query=Subramaniam%2C+G">Ganesh Subramaniam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.13419v1-abstract-short" style="display: inline;"> We investigate the detection of botnet command and control (C2) hosts in massive IP traffic using machine learning methods. To this end, we use NetFlow data -- the industry standard for monitoring of IP traffic -- and ML models using two sets of features: conventional NetFlow variables and distributional features based on NetFlow variables. In addition to using static summaries of NetFlow features… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.13419v1-abstract-full').style.display = 'inline'; document.getElementById('2211.13419v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.13419v1-abstract-full" style="display: none;"> We investigate the detection of botnet command and control (C2) hosts in massive IP traffic using machine learning methods. To this end, we use NetFlow data -- the industry standard for monitoring of IP traffic -- and ML models using two sets of features: conventional NetFlow variables and distributional features based on NetFlow variables. In addition to using static summaries of NetFlow features, we use quantiles of their IP-level distributions as input features in predictive models to predict whether an IP belongs to known botnet families. These models are used to develop intrusion detection systems to predict traffic traces identified with malicious attacks. The results are validated by matching predictions to existing denylists of published malicious IP addresses and deep packet inspection. The usage of our proposed novel distributional features, combined with techniques that enable modelling complex input feature spaces result in highly accurate predictions by our trained models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.13419v1-abstract-full').style.display = 'none'; document.getElementById('2211.13419v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted and presented in CAMLIS 2022, https://www.camlis.org/2022-conference. arXiv admin note: text overlap with arXiv:2108.08924</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.05853">arXiv:2211.05853</a> <span> [<a href="https://arxiv.org/pdf/2211.05853">pdf</a>, <a href="https://arxiv.org/ps/2211.05853">ps</a>, <a href="https://arxiv.org/format/2211.05853">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Measuring Reliability of Large Language Models through Semantic Consistency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Raj%2C+H">Harsh Raj</a>, <a href="/search/cs?searchtype=author&query=Rosati%2C+D">Domenic Rosati</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Subhabrata Majumdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.05853v2-abstract-short" style="display: inline;"> While large pretrained language models (PLMs) demonstrate incredible fluency and performance on many natural language tasks, recent work has shown that well-performing PLMs are very sensitive to what prompts are feed into them. Even when prompts are semantically identical, language models may give very different answers. When considering safe and trustworthy deployments of PLMs we would like their… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.05853v2-abstract-full').style.display = 'inline'; document.getElementById('2211.05853v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.05853v2-abstract-full" style="display: none;"> While large pretrained language models (PLMs) demonstrate incredible fluency and performance on many natural language tasks, recent work has shown that well-performing PLMs are very sensitive to what prompts are feed into them. Even when prompts are semantically identical, language models may give very different answers. When considering safe and trustworthy deployments of PLMs we would like their outputs to be consistent under prompts that mean the same thing or convey the same intent. While some work has looked into how state-of-the-art PLMs address this need, they have been limited to only evaluating lexical equality of single- or multi-word answers and do not address consistency of generative text sequences. In order to understand consistency of PLMs under text generation settings, we develop a measure of semantic consistency that allows the comparison of open-ended text outputs. We implement several versions of this consistency metric to evaluate the performance of a number of PLMs on paraphrased versions of questions in the TruthfulQA dataset, we find that our proposed metrics are considerably more consistent than traditional metrics embodying lexical consistency, and also correlate with human evaluation of output consistency to a higher degree. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.05853v2-abstract-full').style.display = 'none'; document.getElementById('2211.05853v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2022 ML Safety Workshop, https://neurips2022.mlsafety.org</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.04568">arXiv:2211.04568</a> <span> [<a href="https://arxiv.org/pdf/2211.04568">pdf</a>, <a href="https://arxiv.org/ps/2211.04568">ps</a>, <a href="https://arxiv.org/format/2211.04568">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Towards Algorithmic Fairness in Space-Time: Filling in Black Holes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Flynn%2C+C">Cheryl Flynn</a>, <a href="/search/cs?searchtype=author&query=Guha%2C+A">Aritra Guha</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Subhabrata Majumdar</a>, <a href="/search/cs?searchtype=author&query=Srivastava%2C+D">Divesh Srivastava</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zhengyi Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.04568v1-abstract-short" style="display: inline;"> New technologies and the availability of geospatial data have drawn attention to spatio-temporal biases present in society. For example: the COVID-19 pandemic highlighted disparities in the availability of broadband service and its role in the digital divide; the environmental justice movement in the United States has raised awareness to health implications for minority populations stemming from h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.04568v1-abstract-full').style.display = 'inline'; document.getElementById('2211.04568v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.04568v1-abstract-full" style="display: none;"> New technologies and the availability of geospatial data have drawn attention to spatio-temporal biases present in society. For example: the COVID-19 pandemic highlighted disparities in the availability of broadband service and its role in the digital divide; the environmental justice movement in the United States has raised awareness to health implications for minority populations stemming from historical redlining practices; and studies have found varying quality and coverage in the collection and sharing of open-source geospatial data. Despite the extensive literature on machine learning (ML) fairness, few algorithmic strategies have been proposed to mitigate such biases. In this paper we highlight the unique challenges for quantifying and addressing spatio-temporal biases, through the lens of use cases presented in the scientific literature and media. We envision a roadmap of ML strategies that need to be developed or adapted to quantify and overcome these challenges -- including transfer learning, active learning, and reinforcement learning techniques. Further, we discuss the potential role of ML in providing guidance to policy makers on issues related to spatial fairness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.04568v1-abstract-full').style.display = 'none'; document.getElementById('2211.04568v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.03541">arXiv:2211.03541</a> <span> [<a href="https://arxiv.org/pdf/2211.03541">pdf</a>, <a href="https://arxiv.org/format/2211.03541">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Multi-blank Transducers for Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hainan Xu</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+F">Fei Jia</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somshubra Majumdar</a>, <a href="/search/cs?searchtype=author&query=Watanabe%2C+S">Shinji Watanabe</a>, <a href="/search/cs?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.03541v2-abstract-short" style="display: inline;"> This paper proposes a modification to RNN-Transducer (RNN-T) models for automatic speech recognition (ASR). In standard RNN-T, the emission of a blank symbol consumes exactly one input frame; in our proposed method, we introduce additional blank symbols, which consume two or more input frames when emitted. We refer to the added symbols as big blanks, and the method multi-blank RNN-T. For training… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03541v2-abstract-full').style.display = 'inline'; document.getElementById('2211.03541v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.03541v2-abstract-full" style="display: none;"> This paper proposes a modification to RNN-Transducer (RNN-T) models for automatic speech recognition (ASR). In standard RNN-T, the emission of a blank symbol consumes exactly one input frame; in our proposed method, we introduce additional blank symbols, which consume two or more input frames when emitted. We refer to the added symbols as big blanks, and the method multi-blank RNN-T. For training multi-blank RNN-Ts, we propose a novel logit under-normalization method in order to prioritize emissions of big blanks. With experiments on multiple languages and datasets, we show that multi-blank RNN-T methods could bring relative speedups of over +90%/+139% to model inference for English Librispeech and German Multilingual Librispeech datasets, respectively. The multi-blank RNN-T method also improves ASR accuracy consistently. We will release our implementation of the method in the NeMo (https://github.com/NVIDIA/NeMo) toolkit. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03541v2-abstract-full').style.display = 'none'; document.getElementById('2211.03541v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ICASSP 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.16780">arXiv:2210.16780</a> <span> [<a href="https://arxiv.org/pdf/2210.16780">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Recognizing Handwriting Styles in a Historical Scanned Document Using Unsupervised Fuzzy Clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Sriparna Majumdar</a>, <a href="/search/cs?searchtype=author&query=Brick%2C+A">Aaron Brick</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.16780v2-abstract-short" style="display: inline;"> The forensic attribution of the handwriting in a digitized document to multiple scribes is a challenging problem of high dimensionality. Unique handwriting styles may be dissimilar in a blend of several factors including character size, stroke width, loops, ductus, slant angles, and cursive ligatures. Previous work on labeled data with Hidden Markov models, support vector machines, and semi-superv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.16780v2-abstract-full').style.display = 'inline'; document.getElementById('2210.16780v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.16780v2-abstract-full" style="display: none;"> The forensic attribution of the handwriting in a digitized document to multiple scribes is a challenging problem of high dimensionality. Unique handwriting styles may be dissimilar in a blend of several factors including character size, stroke width, loops, ductus, slant angles, and cursive ligatures. Previous work on labeled data with Hidden Markov models, support vector machines, and semi-supervised recurrent neural networks have provided moderate to high success. In this study, we successfully detect hand shifts in a historical manuscript through fuzzy soft clustering in combination with linear principal component analysis. This advance demonstrates the successful deployment of unsupervised methods for writer attribution of historical documents and forensic document analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.16780v2-abstract-full').style.display = 'none'; document.getElementById('2210.16780v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26 pages in total, 5 figures and 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.03255">arXiv:2210.03255</a> <span> [<a href="https://arxiv.org/pdf/2210.03255">pdf</a>, <a href="https://arxiv.org/format/2210.03255">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Damage Control During Domain Adaptation for Transducer Based Automatic Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somshubra Majumdar</a>, <a href="/search/cs?searchtype=author&query=Acharya%2C+S">Shantanu Acharya</a>, <a href="/search/cs?searchtype=author&query=Lavrukhin%2C+V">Vitaly Lavrukhin</a>, <a href="/search/cs?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.03255v1-abstract-short" style="display: inline;"> Automatic speech recognition models are often adapted to improve their accuracy in a new domain. A potential drawback of model adaptation to new domains is catastrophic forgetting, where the Word Error Rate on the original domain is significantly degraded. This paper addresses the situation when we want to simultaneously adapt automatic speech recognition models to a new domain and limit the degra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.03255v1-abstract-full').style.display = 'inline'; document.getElementById('2210.03255v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.03255v1-abstract-full" style="display: none;"> Automatic speech recognition models are often adapted to improve their accuracy in a new domain. A potential drawback of model adaptation to new domains is catastrophic forgetting, where the Word Error Rate on the original domain is significantly degraded. This paper addresses the situation when we want to simultaneously adapt automatic speech recognition models to a new domain and limit the degradation of accuracy on the original domain without access to the original training dataset. We propose several techniques such as a limited training strategy and regularized adapter modules for the Transducer encoder, prediction, and joiner network. We apply these methods to the Google Speech Commands and to the UK and Ireland English Dialect speech data set and obtain strong results on the new target domain while limiting the degradation on the original domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.03255v1-abstract-full').style.display = 'none'; document.getElementById('2210.03255v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in Proc. SLT 2022, Jan 09-12, 2023, Doha, Qatar</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.00498">arXiv:2208.00498</a> <span> [<a href="https://arxiv.org/pdf/2208.00498">pdf</a>, <a href="https://arxiv.org/format/2208.00498">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DNNShield: Dynamic Randomized Model Sparsification, A Defense Against Adversarial Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Samavatian%2C+M+H">Mohammad Hossein Samavatian</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Saikat Majumdar</a>, <a href="/search/cs?searchtype=author&query=Barber%2C+K">Kristin Barber</a>, <a href="/search/cs?searchtype=author&query=Teodorescu%2C+R">Radu Teodorescu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.00498v1-abstract-short" style="display: inline;"> DNNs are known to be vulnerable to so-called adversarial attacks that manipulate inputs to cause incorrect results that can be beneficial to an attacker or damaging to the victim. Recent works have proposed approximate computation as a defense mechanism against machine learning attacks. We show that these approaches, while successful for a range of inputs, are insufficient to address stronger, hig… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.00498v1-abstract-full').style.display = 'inline'; document.getElementById('2208.00498v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.00498v1-abstract-full" style="display: none;"> DNNs are known to be vulnerable to so-called adversarial attacks that manipulate inputs to cause incorrect results that can be beneficial to an attacker or damaging to the victim. Recent works have proposed approximate computation as a defense mechanism against machine learning attacks. We show that these approaches, while successful for a range of inputs, are insufficient to address stronger, high-confidence adversarial attacks. To address this, we propose DNNSHIELD, a hardware-accelerated defense that adapts the strength of the response to the confidence of the adversarial input. Our approach relies on dynamic and random sparsification of the DNN model to achieve inference approximation efficiently and with fine-grain control over the approximation error. DNNSHIELD uses the output distribution characteristics of sparsified inference compared to a dense reference to detect adversarial inputs. We show an adversarial detection rate of 86% when applied to VGG16 and 88% when applied to ResNet50, which exceeds the detection rate of the state of the art approaches, with a much lower overhead. We demonstrate a software/hardware-accelerated FPGA prototype, which reduces the performance impact of DNNSHIELD relative to software-only CPU and GPU implementations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.00498v1-abstract-full').style.display = 'none'; document.getElementById('2208.00498v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.10488">arXiv:2207.10488</a> <span> [<a href="https://arxiv.org/pdf/2207.10488">pdf</a>, <a href="https://arxiv.org/format/2207.10488">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Statistical Mechanics">cond-mat.stat-mech</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1088/1742-5468/ad002d">10.1088/1742-5468/ad002d <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Metropolis Monte Carlo sampling: convergence, localization transition and optimality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chepelianskii%2C+A+D">Alexei D. Chepelianskii</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S+N">Satya N. Majumdar</a>, <a href="/search/cs?searchtype=author&query=Schawe%2C+H">Hendrik Schawe</a>, <a href="/search/cs?searchtype=author&query=Trizac%2C+E">Emmanuel Trizac</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.10488v4-abstract-short" style="display: inline;"> Among random sampling methods, Markov Chain Monte Carlo algorithms are foremost. Using a combination of analytical and numerical approaches, we study their convergence properties towards the steady state, within a random walk Metropolis scheme. Analysing the relaxation properties of some model algorithms sufficiently simple to enable analytic progress, we show that the deviations from the target s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.10488v4-abstract-full').style.display = 'inline'; document.getElementById('2207.10488v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.10488v4-abstract-full" style="display: none;"> Among random sampling methods, Markov Chain Monte Carlo algorithms are foremost. Using a combination of analytical and numerical approaches, we study their convergence properties towards the steady state, within a random walk Metropolis scheme. Analysing the relaxation properties of some model algorithms sufficiently simple to enable analytic progress, we show that the deviations from the target steady-state distribution can feature a localization transition as a function of the characteristic length of the attempted jumps defining the random walk. While the iteration of the Monte Carlo algorithm converges to equilibrium for all choices of jump parameters, the localization transition changes drastically the asymptotic shape of the difference between the probability distribution reached after a finite number of steps of the algorithm and the target equilibrium distribution. We argue that the relaxation before and after the localisation transition is respectively limited by diffusion and rejection rates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.10488v4-abstract-full').style.display = 'none'; document.getElementById('2207.10488v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Journal of Statistical Mechanics 123205, (2023) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.07783">arXiv:2207.07783</a> <span> [<a href="https://arxiv.org/pdf/2207.07783">pdf</a>, <a href="https://arxiv.org/format/2207.07783">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning Long-Term Spatial-Temporal Graphs for Active Speaker Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Min%2C+K">Kyle Min</a>, <a href="/search/cs?searchtype=author&query=Roy%2C+S">Sourya Roy</a>, <a href="/search/cs?searchtype=author&query=Tripathi%2C+S">Subarna Tripathi</a>, <a href="/search/cs?searchtype=author&query=Guha%2C+T">Tanaya Guha</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somdeb Majumdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.07783v3-abstract-short" style="display: inline;"> Active speaker detection (ASD) in videos with multiple speakers is a challenging task as it requires learning effective audiovisual features and spatial-temporal correlations over long temporal windows. In this paper, we present SPELL, a novel spatial-temporal graph learning framework that can solve complex tasks such as ASD. To this end, each person in a video frame is first encoded in a unique n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07783v3-abstract-full').style.display = 'inline'; document.getElementById('2207.07783v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.07783v3-abstract-full" style="display: none;"> Active speaker detection (ASD) in videos with multiple speakers is a challenging task as it requires learning effective audiovisual features and spatial-temporal correlations over long temporal windows. In this paper, we present SPELL, a novel spatial-temporal graph learning framework that can solve complex tasks such as ASD. To this end, each person in a video frame is first encoded in a unique node for that frame. Nodes corresponding to a single person across frames are connected to encode their temporal dynamics. Nodes within a frame are also connected to encode inter-person relationships. Thus, SPELL reduces ASD to a node classification task. Importantly, SPELL is able to reason over long temporal contexts for all nodes without relying on computationally expensive fully connected graph neural networks. Through extensive experiments on the AVA-ActiveSpeaker dataset, we demonstrate that learning graph-based representations can significantly improve the active speaker detection performance owing to its explicit spatial and temporal structure. SPELL outperforms all previous state-of-the-art approaches while requiring significantly lower memory and computational resources. Our code is publicly available at https://github.com/SRA2/SPELL <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07783v3-abstract-full').style.display = 'none'; document.getElementById('2207.07783v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2022 camera ready (Supplementary videos: on ECVA soon). This paper supersedes arXiv:2112.01479</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.13046">arXiv:2206.13046</a> <span> [<a href="https://arxiv.org/pdf/2206.13046">pdf</a>, <a href="https://arxiv.org/format/2206.13046">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DPOAD: Differentially Private Outsourcing of Anomaly Detection through Iterative Sensitivity Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mohammady%2C+M">Meisam Mohammady</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Han Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lingyu Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mengyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Jarraya%2C+Y">Yosr Jarraya</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Suryadipta Majumdar</a>, <a href="/search/cs?searchtype=author&query=Pourzandi%2C+M">Makan Pourzandi</a>, <a href="/search/cs?searchtype=author&query=Debbabi%2C+M">Mourad Debbabi</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yuan Hong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.13046v1-abstract-short" style="display: inline;"> Outsourcing anomaly detection to third-parties can allow data owners to overcome resource constraints (e.g., in lightweight IoT devices), facilitate collaborative analysis (e.g., under distributed or multi-party scenarios), and benefit from lower costs and specialized expertise (e.g., of Managed Security Service Providers). Despite such benefits, a data owner may feel reluctant to outsource anomal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.13046v1-abstract-full').style.display = 'inline'; document.getElementById('2206.13046v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.13046v1-abstract-full" style="display: none;"> Outsourcing anomaly detection to third-parties can allow data owners to overcome resource constraints (e.g., in lightweight IoT devices), facilitate collaborative analysis (e.g., under distributed or multi-party scenarios), and benefit from lower costs and specialized expertise (e.g., of Managed Security Service Providers). Despite such benefits, a data owner may feel reluctant to outsource anomaly detection without sufficient privacy protection. To that end, most existing privacy solutions would face a novel challenge, i.e., preserving privacy usually requires the difference between data entries to be eliminated or reduced, whereas anomaly detection critically depends on that difference. Such a conflict is recently resolved under a local analysis setting with trusted analysts (where no outsourcing is involved) through moving the focus of differential privacy (DP) guarantee from "all" to only "benign" entries. In this paper, we observe that such an approach is not directly applicable to the outsourcing setting, because data owners do not know which entries are "benign" prior to outsourcing, and hence cannot selectively apply DP on data entries. Therefore, we propose a novel iterative solution for the data owner to gradually "disentangle" the anomalous entries from the benign ones such that the third-party analyst can produce accurate anomaly results with sufficient DP guarantee. We design and implement our Differentially Private Outsourcing of Anomaly Detection (DPOAD) framework, and demonstrate its benefits over baseline Laplace and PainFree mechanisms through experiments with real data from different application domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.13046v1-abstract-full').style.display = 'none'; document.getElementById('2206.13046v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.05391">arXiv:2206.05391</a> <span> [<a href="https://arxiv.org/pdf/2206.05391">pdf</a>, <a href="https://arxiv.org/format/2206.05391">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Feature Selection using e-values </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Subhabrata Majumdar</a>, <a href="/search/cs?searchtype=author&query=Chatterjee%2C+S">Snigdhansu Chatterjee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.05391v2-abstract-short" style="display: inline;"> In the context of supervised parametric models, we introduce the concept of e-values. An e-value is a scalar quantity that represents the proximity of the sampling distribution of parameter estimates in a model trained on a subset of features to that of the model trained on all features (i.e. the full model). Under general conditions, a rank ordering of e-values separates models that contain all e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.05391v2-abstract-full').style.display = 'inline'; document.getElementById('2206.05391v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.05391v2-abstract-full" style="display: none;"> In the context of supervised parametric models, we introduce the concept of e-values. An e-value is a scalar quantity that represents the proximity of the sampling distribution of parameter estimates in a model trained on a subset of features to that of the model trained on all features (i.e. the full model). Under general conditions, a rank ordering of e-values separates models that contain all essential features from those that do not. The e-values are applicable to a wide range of parametric models. We use data depths and a fast resampling-based algorithm to implement a feature selection procedure using e-values, providing consistency results. For a $p$-dimensional feature space, this procedure requires fitting only the full model and evaluating $p+1$ models, as opposed to the traditional requirement of fitting and evaluating $2^p$ models. Through experiments across several model settings and synthetic and real datasets, we establish that the e-values method as a promising general alternative to existing model-specific methods of feature selection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.05391v2-abstract-full').style.display = 'none'; document.getElementById('2206.05391v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted in ICML-2022</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 39th International Conference on Machine Learning, PMLR 162:14753-14773, 2022, https://proceedings.mlr.press/v162/majumdar22a.html </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.06386">arXiv:2204.06386</a> <span> [<a href="https://arxiv.org/pdf/2204.06386">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> </div> </div> <p class="title is-5 mathjax"> Efficient Deep Neural Network Accelerator Using Controlled Ferroelectric Domain Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Sayani Majumdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.06386v4-abstract-short" style="display: inline;"> The current work reports an efficient deep neural network (DNN) accelerator where synaptic weight elements are controlled by ferroelectric domain dynamics. An integrated device-to-algorithm framework for benchmarking novel synaptic devices is used. In P(VDF-TrFE) based ferroelectric tunnel junctions, analog conductance states are measured using a custom pulsing protocol and associated custom circu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.06386v4-abstract-full').style.display = 'inline'; document.getElementById('2204.06386v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.06386v4-abstract-full" style="display: none;"> The current work reports an efficient deep neural network (DNN) accelerator where synaptic weight elements are controlled by ferroelectric domain dynamics. An integrated device-to-algorithm framework for benchmarking novel synaptic devices is used. In P(VDF-TrFE) based ferroelectric tunnel junctions, analog conductance states are measured using a custom pulsing protocol and associated custom circuits and array architectures for DNN training is simulated. Our results show precise control of polarization switching dynamics in multi-domain, polycrystalline ferroelectric thin films can produce considerable weight update linearity in metal-ferroelectric-semiconductor (MFS) tunnel junctions. Ultrafast switching and low junction current in these devices offer extremely energy efficient operation. Through an integrated platform of hardware development, characterization and modelling, we predict the available conductance range where linearity is expected under identical potentiating and depressing pulses for efficient DNN training and inference tasks. As an example, an analog crossbar based DNN accelerator with MFS junctions as synaptic weight elements showed ~ 93% training accuracy on large MNIST handwritten digit dataset while for cropped images, a 95% accuracy is achieved. One observed challenge is rather limited dynamic conductance range while operating under identical potentiating and depressing pulses below 1V. Investigation is underway for improving the dynamic conductance range without losing the weight update linearity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.06386v4-abstract-full').style.display = 'none'; document.getElementById('2204.06386v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.01696">arXiv:2204.01696</a> <span> [<a href="https://arxiv.org/pdf/2204.01696">pdf</a>, <a href="https://arxiv.org/format/2204.01696">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Joint Hand Motion and Interaction Hotspots Prediction from Egocentric Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shaowei Liu</a>, <a href="/search/cs?searchtype=author&query=Tripathi%2C+S">Subarna Tripathi</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somdeb Majumdar</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaolong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.01696v1-abstract-short" style="display: inline;"> We propose to forecast future hand-object interactions given an egocentric video. Instead of predicting action labels or pixels, we directly predict the hand motion trajectory and the future contact points on the next active object (i.e., interaction hotspots). This relatively low-dimensional representation provides a concrete description of future interactions. To tackle this task, we first provi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.01696v1-abstract-full').style.display = 'inline'; document.getElementById('2204.01696v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.01696v1-abstract-full" style="display: none;"> We propose to forecast future hand-object interactions given an egocentric video. Instead of predicting action labels or pixels, we directly predict the hand motion trajectory and the future contact points on the next active object (i.e., interaction hotspots). This relatively low-dimensional representation provides a concrete description of future interactions. To tackle this task, we first provide an automatic way to collect trajectory and hotspots labels on large-scale data. We then use this data to train an Object-Centric Transformer (OCT) model for prediction. Our model performs hand and object interaction reasoning via the self-attention mechanism in Transformers. OCT also provides a probabilistic framework to sample the future trajectory and hotspots to handle uncertainty in prediction. We perform experiments on the Epic-Kitchens-55, Epic-Kitchens-100, and EGTEA Gaze+ datasets, and show that OCT significantly outperforms state-of-the-art approaches by a large margin. Project page is available at https://stevenlsw.github.io/hoi-forecast . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.01696v1-abstract-full').style.display = 'none'; document.getElementById('2204.01696v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2022, Project page: https://stevenlsw.github.io/hoi-forecast</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.09828">arXiv:2112.09828</a> <span> [<a href="https://arxiv.org/pdf/2112.09828">pdf</a>, <a href="https://arxiv.org/format/2112.09828">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Long-Term Dependencies for Generating Dynamic Scene Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+S">Shengyu Feng</a>, <a href="/search/cs?searchtype=author&query=Tripathi%2C+S">Subarna Tripathi</a>, <a href="/search/cs?searchtype=author&query=Mostafa%2C+H">Hesham Mostafa</a>, <a href="/search/cs?searchtype=author&query=Nassar%2C+M">Marcel Nassar</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somdeb Majumdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.09828v2-abstract-short" style="display: inline;"> Dynamic scene graph generation from a video is challenging due to the temporal dynamics of the scene and the inherent temporal fluctuations of predictions. We hypothesize that capturing long-term temporal dependencies is the key to effective generation of dynamic scene graphs. We propose to learn the long-term dependencies in a video by capturing the object-level consistency and inter-object relat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.09828v2-abstract-full').style.display = 'inline'; document.getElementById('2112.09828v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.09828v2-abstract-full" style="display: none;"> Dynamic scene graph generation from a video is challenging due to the temporal dynamics of the scene and the inherent temporal fluctuations of predictions. We hypothesize that capturing long-term temporal dependencies is the key to effective generation of dynamic scene graphs. We propose to learn the long-term dependencies in a video by capturing the object-level consistency and inter-object relationship dynamics over object-level long-term tracklets using transformers. Experimental results demonstrate that our Dynamic Scene Graph Detection Transformer (DSG-DETR) outperforms state-of-the-art methods by a significant margin on the benchmark dataset Action Genome. Our ablation studies validate the effectiveness of each component of the proposed approach. The source code is available at https://github.com/Shengyu-Feng/DSG-DETR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.09828v2-abstract-full').style.display = 'none'; document.getElementById('2112.09828v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">WACV 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.01479">arXiv:2112.01479</a> <span> [<a href="https://arxiv.org/pdf/2112.01479">pdf</a>, <a href="https://arxiv.org/format/2112.01479">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning Spatial-Temporal Graphs for Active Speaker Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Roy%2C+S">Sourya Roy</a>, <a href="/search/cs?searchtype=author&query=Min%2C+K">Kyle Min</a>, <a href="/search/cs?searchtype=author&query=Tripathi%2C+S">Subarna Tripathi</a>, <a href="/search/cs?searchtype=author&query=Guha%2C+T">Tanaya Guha</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Somdeb Majumdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.01479v2-abstract-short" style="display: inline;"> We address the problem of active speaker detection through a new framework, called SPELL, that learns long-range multimodal graphs to encode the inter-modal relationship between audio and visual data. We cast active speaker detection as a node classification task that is aware of longer-term dependencies. We first construct a graph from a video so that each node corresponds to one person. Nodes re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.01479v2-abstract-full').style.display = 'inline'; document.getElementById('2112.01479v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.01479v2-abstract-full" style="display: none;"> We address the problem of active speaker detection through a new framework, called SPELL, that learns long-range multimodal graphs to encode the inter-modal relationship between audio and visual data. We cast active speaker detection as a node classification task that is aware of longer-term dependencies. We first construct a graph from a video so that each node corresponds to one person. Nodes representing the same identity share edges between them within a defined temporal window. Nodes within the same video frame are also connected to encode inter-person interactions. Through extensive experiments on the Ava-ActiveSpeaker dataset, we demonstrate that learning graph-based representation, owing to its explicit spatial and temporal structure, significantly improves the overall performance. SPELL outperforms several relevant baselines and performs at par with state of the art models while requiring an order of magnitude lower computation cost. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.01479v2-abstract-full').style.display = 'none'; document.getElementById('2112.01479v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Majumdar%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Majumdar%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Majumdar%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>