Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 601 results for author: <span class="mathjax">Choi, Y</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Choi%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Choi, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Choi%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Choi, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Choi%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Choi%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Choi%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Choi%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Choi%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Choi%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09993">arXiv:2502.09993</a> <span> [<a href="https://arxiv.org/pdf/2502.09993">pdf</a>, <a href="https://arxiv.org/format/2502.09993">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Navigating Label Ambiguity for Facial Expression Recognition in the Wild </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+J">JunGyu Lee</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yeji Choi</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+H">Haksub Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+I">Ig-Jae Kim</a>, <a href="/search/cs?searchtype=author&query=Nam%2C+G+P">Gi Pyo Nam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09993v1-abstract-short" style="display: inline;"> Facial expression recognition (FER) remains a challenging task due to label ambiguity caused by the subjective nature of facial expressions and noisy samples. Additionally, class imbalance, which is common in real-world datasets, further complicates FER. Although many studies have shown impressive improvements, they typically address only one of these issues, leading to suboptimal results. To tack… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09993v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09993v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09993v1-abstract-full" style="display: none;"> Facial expression recognition (FER) remains a challenging task due to label ambiguity caused by the subjective nature of facial expressions and noisy samples. Additionally, class imbalance, which is common in real-world datasets, further complicates FER. Although many studies have shown impressive improvements, they typically address only one of these issues, leading to suboptimal results. To tackle both challenges simultaneously, we propose a novel framework called Navigating Label Ambiguity (NLA), which is robust under real-world conditions. The motivation behind NLA is that dynamically estimating and emphasizing ambiguous samples at each iteration helps mitigate noise and class imbalance by reducing the model's bias toward majority classes. To achieve this, NLA consists of two main components: Noise-aware Adaptive Weighting (NAW) and consistency regularization. Specifically, NAW adaptively assigns higher importance to ambiguous samples and lower importance to noisy ones, based on the correlation between the intermediate prediction scores for the ground truth and the nearest negative. Moreover, we incorporate a regularization term to ensure consistent latent distributions. Consequently, NLA enables the model to progressively focus on more challenging ambiguous samples, which primarily belong to the minority class, in the later stages of training. Extensive experiments demonstrate that NLA outperforms existing methods in both overall and mean accuracy, confirming its robustness against noise and class imbalance. To the best of our knowledge, this is the first framework to address both problems simultaneously. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09993v1-abstract-full').style.display = 'none'; document.getElementById('2502.09993v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09050">arXiv:2502.09050</a> <span> [<a href="https://arxiv.org/pdf/2502.09050">pdf</a>, <a href="https://arxiv.org/format/2502.09050">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Member-Group Relations via Multi-View Graph Filtering for Effective Group Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+C">Chae-Hyun Kim</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yoon-Ryung Choi</a>, <a href="/search/cs?searchtype=author&query=Park%2C+J">Jin-Duk Park</a>, <a href="/search/cs?searchtype=author&query=Shin%2C+W">Won-Yong Shin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09050v1-abstract-short" style="display: inline;"> Group recommendation aims at providing optimized recommendations tailored to diverse groups, enabling groups to enjoy appropriate items. On the other hand, most existing group recommendation methods are built upon deep neural network (DNN) architectures designed to capture the intricate relationships between member-level and group-level interactions. While these DNN-based approaches have proven th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09050v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09050v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09050v1-abstract-full" style="display: none;"> Group recommendation aims at providing optimized recommendations tailored to diverse groups, enabling groups to enjoy appropriate items. On the other hand, most existing group recommendation methods are built upon deep neural network (DNN) architectures designed to capture the intricate relationships between member-level and group-level interactions. While these DNN-based approaches have proven their effectiveness, they require complex and expensive training procedures to incorporate group-level interactions in addition to member-level interactions. To overcome such limitations, we introduce Group-GF, a new approach for extremely fast recommendations of items to each group via multi-view graph filtering (GF) that offers a holistic view of complex member-group dynamics, without the need for costly model training. Specifically, in Group-GF, we first construct three item similarity graphs manifesting different viewpoints for GF. Then, we discover a distinct polynomial graph filter for each similarity graph and judiciously aggregate the three graph filters. Extensive experiments demonstrate the effectiveness of Group-GF in terms of significantly reducing runtime and achieving state-of-the-art recommendation accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09050v1-abstract-full').style.display = 'none'; document.getElementById('2502.09050v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures, 4 tables; ACM Web Conference (WWW 2025) (to appear) (Please cite our conference version.)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06894">arXiv:2502.06894</a> <span> [<a href="https://arxiv.org/pdf/2502.06894">pdf</a>, <a href="https://arxiv.org/format/2502.06894">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AI-Driven HSI: Multimodality, Fusion, Challenges, and the Deep Learning Revolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bhatti%2C+D+S">David S. Bhatti</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yougin Choi</a>, <a href="/search/cs?searchtype=author&query=Wahidur%2C+R+S+M">Rahman S M Wahidur</a>, <a href="/search/cs?searchtype=author&query=Bakhtawar%2C+M">Maleeka Bakhtawar</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">Sumin Kim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">Surin Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Yongtae Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Heung-No Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06894v1-abstract-short" style="display: inline;"> Hyperspectral imaging (HSI) captures spatial and spectral data, enabling analysis of features invisible to conventional systems. The technology is vital in fields such as weather monitoring, food quality control, counterfeit detection, healthcare diagnostics, and extending into defense, agriculture, and industrial automation at the same time. HSI has advanced with improvements in spectral resoluti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06894v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06894v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06894v1-abstract-full" style="display: none;"> Hyperspectral imaging (HSI) captures spatial and spectral data, enabling analysis of features invisible to conventional systems. The technology is vital in fields such as weather monitoring, food quality control, counterfeit detection, healthcare diagnostics, and extending into defense, agriculture, and industrial automation at the same time. HSI has advanced with improvements in spectral resolution, miniaturization, and computational methods. This study provides an overview of the HSI, its applications, challenges in data fusion and the role of deep learning models in processing HSI data. We discuss how integration of multimodal HSI with AI, particularly with deep learning, improves classification accuracy and operational efficiency. Deep learning enhances HSI analysis in areas like feature extraction, change detection, denoising unmixing, dimensionality reduction, landcover mapping, data augmentation, spectral construction and super resolution. An emerging focus is the fusion of hyperspectral cameras with large language models (LLMs), referred as highbrain LLMs, enabling the development of advanced applications such as low visibility crash detection and face antispoofing. We also highlight key players in HSI industry, its compound annual growth rate and the growing industrial significance. The purpose is to offer insight to both technical and non-technical audience, covering HSI's images, trends, and future directions, while providing valuable information on HSI datasets and software libraries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06894v1-abstract-full').style.display = 'none'; document.getElementById('2502.06894v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">39 Pages, 22 figures, 20 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T07 Artificial neural networks and deep learning </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04506">arXiv:2502.04506</a> <span> [<a href="https://arxiv.org/pdf/2502.04506">pdf</a>, <a href="https://arxiv.org/format/2502.04506">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> When One LLM Drools, Multi-LLM Collaboration Rules </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+S">Shangbin Feng</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+W">Wenxuan Ding</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+A">Alisa Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zifeng Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weijia Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yike Wang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zejiang Shen</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xiaochuang Han</a>, <a href="/search/cs?searchtype=author&query=Lang%2C+H">Hunter Lang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+C">Chen-Yu Lee</a>, <a href="/search/cs?searchtype=author&query=Pfister%2C+T">Tomas Pfister</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Tsvetkov%2C+Y">Yulia Tsvetkov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04506v1-abstract-short" style="display: inline;"> This position paper argues that in many realistic (i.e., complex, contextualized, subjective) scenarios, one LLM is not enough to produce a reliable output. We challenge the status quo of relying solely on a single general-purpose LLM and argue for multi-LLM collaboration to better represent the extensive diversity of data, skills, and people. We first posit that a single LLM underrepresents real-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04506v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04506v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04506v1-abstract-full" style="display: none;"> This position paper argues that in many realistic (i.e., complex, contextualized, subjective) scenarios, one LLM is not enough to produce a reliable output. We challenge the status quo of relying solely on a single general-purpose LLM and argue for multi-LLM collaboration to better represent the extensive diversity of data, skills, and people. We first posit that a single LLM underrepresents real-world data distributions, heterogeneous skills, and pluralistic populations, and that such representation gaps cannot be trivially patched by further training a single LLM. We then organize existing multi-LLM collaboration methods into a hierarchy, based on the level of access and information exchange, ranging from API-level, text-level, logit-level, to weight-level collaboration. Based on these methods, we highlight how multi-LLM collaboration addresses challenges that a single LLM struggles with, such as reliability, democratization, and pluralism. Finally, we identify the limitations of existing multi-LLM methods and motivate future work. We envision multi-LLM collaboration as an essential path toward compositional intelligence and collaborative AI development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04506v1-abstract-full').style.display = 'none'; document.getElementById('2502.04506v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01100">arXiv:2502.01100</a> <span> [<a href="https://arxiv.org/pdf/2502.01100">pdf</a>, <a href="https://arxiv.org/format/2502.01100">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ZebraLogic: On the Scaling Limits of LLMs for Logical Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+B+Y">Bill Yuchen Lin</a>, <a href="/search/cs?searchtype=author&query=Bras%2C+R+L">Ronan Le Bras</a>, <a href="/search/cs?searchtype=author&query=Richardson%2C+K">Kyle Richardson</a>, <a href="/search/cs?searchtype=author&query=Sabharwal%2C+A">Ashish Sabharwal</a>, <a href="/search/cs?searchtype=author&query=Poovendran%2C+R">Radha Poovendran</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+P">Peter Clark</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01100v1-abstract-short" style="display: inline;"> We investigate the logical reasoning capabilities of large language models (LLMs) and their scalability in complex non-monotonic reasoning. To this end, we introduce ZebraLogic, a comprehensive evaluation framework for assessing LLM reasoning performance on logic grid puzzles derived from constraint satisfaction problems (CSPs). ZebraLogic enables the generation of puzzles with controllable and qu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01100v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01100v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01100v1-abstract-full" style="display: none;"> We investigate the logical reasoning capabilities of large language models (LLMs) and their scalability in complex non-monotonic reasoning. To this end, we introduce ZebraLogic, a comprehensive evaluation framework for assessing LLM reasoning performance on logic grid puzzles derived from constraint satisfaction problems (CSPs). ZebraLogic enables the generation of puzzles with controllable and quantifiable complexity, facilitating a systematic study of the scaling limits of models such as Llama, o1 models, and DeepSeek-R1. By encompassing a broad range of search space complexities and diverse logical constraints, ZebraLogic provides a structured environment to evaluate reasoning under increasing difficulty. Our results reveal a significant decline in accuracy as problem complexity grows -- a phenomenon we term the curse of complexity. This limitation persists even with larger models and increased inference-time computation, suggesting inherent constraints in current LLM reasoning capabilities. Additionally, we explore strategies to enhance logical reasoning, including Best-of-N sampling, backtracking mechanisms, and self-verification prompts. Our findings offer critical insights into the scalability of LLM reasoning, highlight fundamental limitations, and outline potential directions for improvement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01100v1-abstract-full').style.display = 'none'; document.getElementById('2502.01100v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Website: https://huggingface.co/spaces/WildEval/ZebraLogic</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17805">arXiv:2501.17805</a> <span> [<a href="https://arxiv.org/pdf/2501.17805">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> International AI Safety Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bengio%2C+Y">Yoshua Bengio</a>, <a href="/search/cs?searchtype=author&query=Mindermann%2C+S">S枚ren Mindermann</a>, <a href="/search/cs?searchtype=author&query=Privitera%2C+D">Daniel Privitera</a>, <a href="/search/cs?searchtype=author&query=Besiroglu%2C+T">Tamay Besiroglu</a>, <a href="/search/cs?searchtype=author&query=Bommasani%2C+R">Rishi Bommasani</a>, <a href="/search/cs?searchtype=author&query=Casper%2C+S">Stephen Casper</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Fox%2C+P">Philip Fox</a>, <a href="/search/cs?searchtype=author&query=Garfinkel%2C+B">Ben Garfinkel</a>, <a href="/search/cs?searchtype=author&query=Goldfarb%2C+D">Danielle Goldfarb</a>, <a href="/search/cs?searchtype=author&query=Heidari%2C+H">Hoda Heidari</a>, <a href="/search/cs?searchtype=author&query=Ho%2C+A">Anson Ho</a>, <a href="/search/cs?searchtype=author&query=Kapoor%2C+S">Sayash Kapoor</a>, <a href="/search/cs?searchtype=author&query=Khalatbari%2C+L">Leila Khalatbari</a>, <a href="/search/cs?searchtype=author&query=Longpre%2C+S">Shayne Longpre</a>, <a href="/search/cs?searchtype=author&query=Manning%2C+S">Sam Manning</a>, <a href="/search/cs?searchtype=author&query=Mavroudis%2C+V">Vasilios Mavroudis</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Michael%2C+J">Julian Michael</a>, <a href="/search/cs?searchtype=author&query=Newman%2C+J">Jessica Newman</a>, <a href="/search/cs?searchtype=author&query=Ng%2C+K+Y">Kwan Yee Ng</a>, <a href="/search/cs?searchtype=author&query=Okolo%2C+C+T">Chinasa T. Okolo</a>, <a href="/search/cs?searchtype=author&query=Raji%2C+D">Deborah Raji</a>, <a href="/search/cs?searchtype=author&query=Sastry%2C+G">Girish Sastry</a>, <a href="/search/cs?searchtype=author&query=Seger%2C+E">Elizabeth Seger</a> , et al. (71 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17805v1-abstract-short" style="display: inline;"> The first International AI Safety Report comprehensively synthesizes the current evidence on the capabilities, risks, and safety of advanced AI systems. The report was mandated by the nations attending the AI Safety Summit in Bletchley, UK. Thirty nations, the UN, the OECD, and the EU each nominated a representative to the report's Expert Advisory Panel. A total of 100 AI experts contributed, repr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17805v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17805v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17805v1-abstract-full" style="display: none;"> The first International AI Safety Report comprehensively synthesizes the current evidence on the capabilities, risks, and safety of advanced AI systems. The report was mandated by the nations attending the AI Safety Summit in Bletchley, UK. Thirty nations, the UN, the OECD, and the EU each nominated a representative to the report's Expert Advisory Panel. A total of 100 AI experts contributed, representing diverse perspectives and disciplines. Led by the report's Chair, these independent experts collectively had full discretion over the report's content. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17805v1-abstract-full').style.display = 'none'; document.getElementById('2501.17805v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08292">arXiv:2501.08292</a> <span> [<a href="https://arxiv.org/pdf/2501.08292">pdf</a>, <a href="https://arxiv.org/format/2501.08292">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> HALoGEN: Fantastic LLM Hallucinations and Where to Find Them </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ravichander%2C+A">Abhilasha Ravichander</a>, <a href="/search/cs?searchtype=author&query=Ghela%2C+S">Shrusti Ghela</a>, <a href="/search/cs?searchtype=author&query=Wadden%2C+D">David Wadden</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08292v1-abstract-short" style="display: inline;"> Despite their impressive ability to generate high-quality and fluent text, generative large language models (LLMs) also produce hallucinations: statements that are misaligned with established world knowledge or provided input context. However, measuring hallucination can be challenging, as having humans verify model generations on-the-fly is both expensive and time-consuming. In this work, we rele… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08292v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08292v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08292v1-abstract-full" style="display: none;"> Despite their impressive ability to generate high-quality and fluent text, generative large language models (LLMs) also produce hallucinations: statements that are misaligned with established world knowledge or provided input context. However, measuring hallucination can be challenging, as having humans verify model generations on-the-fly is both expensive and time-consuming. In this work, we release HALoGEN, a comprehensive hallucination benchmark consisting of: (1) 10,923 prompts for generative models spanning nine domains including programming, scientific attribution, and summarization, and (2) automatic high-precision verifiers for each use case that decompose LLM generations into atomic units, and verify each unit against a high-quality knowledge source. We use this framework to evaluate ~150,000 generations from 14 language models, finding that even the best-performing models are riddled with hallucinations (sometimes up to 86% of generated atomic facts depending on the domain). We further define a novel error classification for LLM hallucinations based on whether they likely stem from incorrect recollection of training data (Type A errors), or incorrect knowledge in training data (Type B errors), or are fabrication (Type C errors). We hope our framework provides a foundation to enable the principled study of why generative models hallucinate, and advances the development of trustworthy large language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08292v1-abstract-full').style.display = 'none'; document.getElementById('2501.08292v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07096">arXiv:2501.07096</a> <span> [<a href="https://arxiv.org/pdf/2501.07096">pdf</a>, <a href="https://arxiv.org/format/2501.07096">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Intent-Interest Disentanglement and Item-Aware Intent Contrastive Learning for Sequential Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yijin Choi</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+C">Chiehyeon Lim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07096v1-abstract-short" style="display: inline;"> Recommender systems aim to provide personalized item recommendations by capturing user behaviors derived from their interaction history. Considering that user interactions naturally occur sequentially based on users' intents in mind, user behaviors can be interpreted as user intents. Therefore, intent-based sequential recommendations are actively studied recently to model user intents from histori… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07096v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07096v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07096v1-abstract-full" style="display: none;"> Recommender systems aim to provide personalized item recommendations by capturing user behaviors derived from their interaction history. Considering that user interactions naturally occur sequentially based on users' intents in mind, user behaviors can be interpreted as user intents. Therefore, intent-based sequential recommendations are actively studied recently to model user intents from historical interactions for a more precise user understanding beyond traditional studies that often overlook the underlying semantics behind user interactions. However, existing studies face three challenges: 1) the limited understanding of user behaviors by focusing solely on intents, 2) the lack of robustness in categorizing intents due to arbitrary fixed numbers of intent categories, and 3) the neglect of interacted items in modeling of user intents. To address these challenges, we propose Intent-Interest Disentanglement and Item-Aware Intent Contrastive Learning for Sequential Recommendation (IDCLRec). IDCLRec disentangles user behaviors into intents which are dynamic motivations and interests which are stable tastes of users for a comprehensive understanding of user behaviors. A causal cross-attention mechanism is used to identify consistent interests across interactions, while residual behaviors are modeled as intents by modeling their temporal dynamics through a similarity adjustment loss. In addition, without predefining the number of intent categories, an importance-weighted attention mechanism captures user-specific categorical intent considering the importance of intent for each interaction. Furthermore, we introduce item-aware contrastive learning which aligns intents that occurred the same interaction and aligns intent with item combinations occurred by the corresponding intent. Extensive experiments conducted on real-world datasets demonstrate the effectiveness of IDCLRec. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07096v1-abstract-full').style.display = 'none'; document.getElementById('2501.07096v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 6 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01347">arXiv:2501.01347</a> <span> [<a href="https://arxiv.org/pdf/2501.01347">pdf</a>, <a href="https://arxiv.org/format/2501.01347">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> AdaptVC: High Quality Voice Conversion with Adaptive Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jaehun Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Ji-Hoon Kim</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yeunju Choi</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+D">Tan Dat Nguyen</a>, <a href="/search/cs?searchtype=author&query=Mun%2C+S">Seongkyu Mun</a>, <a href="/search/cs?searchtype=author&query=Chung%2C+J+S">Joon Son Chung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01347v4-abstract-short" style="display: inline;"> The goal of voice conversion is to transform the speech of a source speaker to sound like that of a reference speaker while preserving the original content. A key challenge is to extract disentangled linguistic content from the source and voice style from the reference. While existing approaches leverage various methods to isolate the two, a generalization still requires further attention, especia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01347v4-abstract-full').style.display = 'inline'; document.getElementById('2501.01347v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01347v4-abstract-full" style="display: none;"> The goal of voice conversion is to transform the speech of a source speaker to sound like that of a reference speaker while preserving the original content. A key challenge is to extract disentangled linguistic content from the source and voice style from the reference. While existing approaches leverage various methods to isolate the two, a generalization still requires further attention, especially for robustness in zero-shot scenarios. In this paper, we achieve successful disentanglement of content and speaker features by tuning self-supervised speech features with adapters. The adapters are trained to dynamically encode nuanced features from rich self-supervised features, and the decoder fuses them to produce speech that accurately resembles the reference with minimal loss of content. Moreover, we leverage a conditional flow matching decoder with cross-attention speaker conditioning to further boost the synthesis quality and efficiency. Subjective and objective evaluations in a zero-shot scenario demonstrate that the proposed method outperforms existing models in speech quality and similarity to the reference speech. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01347v4-abstract-full').style.display = 'none'; document.getElementById('2501.01347v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICASSP 2025; demo available https://mm.kaist.ac.kr/projects/AdaptVC</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01273">arXiv:2501.01273</a> <span> [<a href="https://arxiv.org/pdf/2501.01273">pdf</a>, <a href="https://arxiv.org/format/2501.01273">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Does a Large Language Model Really Speak in Human-Like Language? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Park%2C+M">Mose Park</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yunjin Choi</a>, <a href="/search/cs?searchtype=author&query=Jeon%2C+J">Jong-June Jeon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01273v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have recently emerged, attracting considerable attention due to their ability to generate highly natural, human-like text. This study compares the latent community structures of LLM-generated text and human-written text within a hypothesis testing procedure. Specifically, we analyze three text sets: original human-written texts ($\mathcal{O}$), their LLM-paraphrased ve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01273v1-abstract-full').style.display = 'inline'; document.getElementById('2501.01273v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01273v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have recently emerged, attracting considerable attention due to their ability to generate highly natural, human-like text. This study compares the latent community structures of LLM-generated text and human-written text within a hypothesis testing procedure. Specifically, we analyze three text sets: original human-written texts ($\mathcal{O}$), their LLM-paraphrased versions ($\mathcal{G}$), and a twice-paraphrased set ($\mathcal{S}$) derived from $\mathcal{G}$. Our analysis addresses two key questions: (1) Is the difference in latent community structures between $\mathcal{O}$ and $\mathcal{G}$ the same as that between $\mathcal{G}$ and $\mathcal{S}$? (2) Does $\mathcal{G}$ become more similar to $\mathcal{O}$ as the LLM parameter controlling text variability is adjusted? The first question is based on the assumption that if LLM-generated text truly resembles human language, then the gap between the pair ($\mathcal{O}$, $\mathcal{G}$) should be similar to that between the pair ($\mathcal{G}$, $\mathcal{S}$), as both pairs consist of an original text and its paraphrase. The second question examines whether the degree of similarity between LLM-generated and human text varies with changes in the breadth of text generation. To address these questions, we propose a statistical hypothesis testing framework that leverages the fact that each text has corresponding parts across all datasets due to their paraphrasing relationship. This relationship enables the mapping of one dataset's relative position to another, allowing two datasets to be mapped to a third dataset. As a result, both mapped datasets can be quantified with respect to the space characterized by the third dataset, facilitating a direct comparison between them. Our results indicate that GPT-generated text remains distinct from human-authored text. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01273v1-abstract-full').style.display = 'none'; document.getElementById('2501.01273v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01182">arXiv:2501.01182</a> <span> [<a href="https://arxiv.org/pdf/2501.01182">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> RingFormer: A Neural Vocoder with Ring Attention and Convolution-Augmented Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hong%2C+S">Seongho Hong</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yong-Hoon Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01182v1-abstract-short" style="display: inline;"> While transformers demonstrate outstanding performance across various audio tasks, their application to neural vocoders remains challenging. Neural vocoders require the generation of long audio signals at the sample level, which demands high temporal resolution. This results in significant computational costs for attention map generation and limits their ability to efficiently process both global… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01182v1-abstract-full').style.display = 'inline'; document.getElementById('2501.01182v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01182v1-abstract-full" style="display: none;"> While transformers demonstrate outstanding performance across various audio tasks, their application to neural vocoders remains challenging. Neural vocoders require the generation of long audio signals at the sample level, which demands high temporal resolution. This results in significant computational costs for attention map generation and limits their ability to efficiently process both global and local information. Additionally, the sequential nature of sample generation in neural vocoders poses difficulties for real-time processing, making the direct adoption of transformers impractical. To address these challenges, we propose RingFormer, a neural vocoder that incorporates the ring attention mechanism into a lightweight transformer variant, the convolution-augmented transformer (Conformer). Ring attention effectively captures local details while integrating global information, making it well-suited for processing long sequences and enabling real-time audio generation. RingFormer is trained using adversarial training with two discriminators. The proposed model is applied to the decoder of the text-to-speech model VITS and compared with state-of-the-art vocoders such as HiFi-GAN, iSTFT-Net, and BigVGAN under identical conditions using various objective and subjective metrics. Experimental results show that RingFormer achieves comparable or superior performance to existing models, particularly excelling in real-time audio generation. Our code and audio samples are available on GitHub. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01182v1-abstract-full').style.display = 'none'; document.getElementById('2501.01182v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19198">arXiv:2412.19198</a> <span> [<a href="https://arxiv.org/pdf/2412.19198">pdf</a>, <a href="https://arxiv.org/format/2412.19198">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multi-Attribute Constraint Satisfaction via Language Model Rewriting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Baheti%2C+A">Ashutosh Baheti</a>, <a href="/search/cs?searchtype=author&query=Chakraborty%2C+D">Debanjana Chakraborty</a>, <a href="/search/cs?searchtype=author&query=Brahman%2C+F">Faeze Brahman</a>, <a href="/search/cs?searchtype=author&query=Bras%2C+R+L">Ronan Le Bras</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Ximing Lu</a>, <a href="/search/cs?searchtype=author&query=Dziri%2C+N">Nouha Dziri</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Riedl%2C+M">Mark Riedl</a>, <a href="/search/cs?searchtype=author&query=Sap%2C+M">Maarten Sap</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19198v1-abstract-short" style="display: inline;"> Obeying precise constraints on top of multiple external attributes is a common computational problem underlying seemingly different domains, from controlled text generation to protein engineering. Existing language model (LM) controllability methods for multi-attribute constraint satisfaction often rely on specialized architectures or gradient-based classifiers, limiting their flexibility to work… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19198v1-abstract-full').style.display = 'inline'; document.getElementById('2412.19198v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19198v1-abstract-full" style="display: none;"> Obeying precise constraints on top of multiple external attributes is a common computational problem underlying seemingly different domains, from controlled text generation to protein engineering. Existing language model (LM) controllability methods for multi-attribute constraint satisfaction often rely on specialized architectures or gradient-based classifiers, limiting their flexibility to work with arbitrary black-box evaluators and pretrained models. Current general-purpose large language models, while capable, cannot achieve fine-grained multi-attribute control over external attributes. Thus, we create Multi-Attribute Constraint Satisfaction (MACS), a generalized method capable of finetuning language models on any sequential domain to satisfy user-specified constraints on multiple external real-value attributes. Our method trains LMs as editors by sampling diverse multi-attribute edit pairs from an initial set of paraphrased outputs. During inference, LM iteratively improves upon its previous solution to satisfy constraints for all attributes by leveraging our designed constraint satisfaction reward. We additionally experiment with reward-weighted behavior cloning to further improve the constraint satisfaction rate of LMs. To evaluate our approach, we present a new Fine-grained Constraint Satisfaction (FineCS) benchmark, featuring two challenging tasks: (1) Text Style Transfer, where the goal is to simultaneously modify the sentiment and complexity of reviews, and (2) Protein Design, focusing on modulating fluorescence and stability of Green Fluorescent Proteins (GFP). Our empirical results show that MACS achieves the highest threshold satisfaction in both FineCS tasks, outperforming strong domain-specific baselines. Our work opens new avenues for generalized and real-value multi-attribute control, with implications for diverse applications spanning NLP and bioinformatics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19198v1-abstract-full').style.display = 'none'; document.getElementById('2412.19198v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12175">arXiv:2412.12175</a> <span> [<a href="https://arxiv.org/pdf/2412.12175">pdf</a>, <a href="https://arxiv.org/format/2412.12175">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Explore Theory of Mind: Program-guided adversarial data generation for theory of mind reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sclar%2C+M">Melanie Sclar</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jane Yu</a>, <a href="/search/cs?searchtype=author&query=Fazel-Zarandi%2C+M">Maryam Fazel-Zarandi</a>, <a href="/search/cs?searchtype=author&query=Tsvetkov%2C+Y">Yulia Tsvetkov</a>, <a href="/search/cs?searchtype=author&query=Bisk%2C+Y">Yonatan Bisk</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Celikyilmaz%2C+A">Asli Celikyilmaz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12175v1-abstract-short" style="display: inline;"> Do large language models (LLMs) have theory of mind? A plethora of papers and benchmarks have been introduced to evaluate if current models have been able to develop this key ability of social intelligence. However, all rely on limited datasets with simple patterns that can potentially lead to problematic blind spots in evaluation and an overestimation of model capabilities. We introduce ExploreTo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12175v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12175v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12175v1-abstract-full" style="display: none;"> Do large language models (LLMs) have theory of mind? A plethora of papers and benchmarks have been introduced to evaluate if current models have been able to develop this key ability of social intelligence. However, all rely on limited datasets with simple patterns that can potentially lead to problematic blind spots in evaluation and an overestimation of model capabilities. We introduce ExploreToM, the first framework to allow large-scale generation of diverse and challenging theory of mind data for robust training and evaluation. Our approach leverages an A* search over a custom domain-specific language to produce complex story structures and novel, diverse, yet plausible scenarios to stress test the limits of LLMs. Our evaluation reveals that state-of-the-art LLMs, such as Llama-3.1-70B and GPT-4o, show accuracies as low as 0% and 9% on ExploreToM-generated data, highlighting the need for more robust theory of mind evaluation. As our generations are a conceptual superset of prior work, fine-tuning on our data yields a 27-point accuracy improvement on the classic ToMi benchmark (Le et al., 2019). ExploreToM also enables uncovering underlying skills and factors missing for models to show theory of mind, such as unreliable state tracking or data imbalances, which may contribute to models' poor performance on benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12175v1-abstract-full').style.display = 'none'; document.getElementById('2412.12175v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11480">arXiv:2412.11480</a> <span> [<a href="https://arxiv.org/pdf/2412.11480">pdf</a>, <a href="https://arxiv.org/format/2412.11480">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Data-driven Precipitation Nowcasting Using Satellite Imagery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Park%2C+Y">Young-Jae Park</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Doyi Kim</a>, <a href="/search/cs?searchtype=author&query=Seo%2C+M">Minseok Seo</a>, <a href="/search/cs?searchtype=author&query=Jeon%2C+H">Hae-Gon Jeon</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yeji Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11480v1-abstract-short" style="display: inline;"> Accurate precipitation forecasting is crucial for early warnings of disasters, such as floods and landslides. Traditional forecasts rely on ground-based radar systems, which are space-constrained and have high maintenance costs. Consequently, most developing countries depend on a global numerical model with low resolution, instead of operating their own radar systems. To mitigate this gap, we prop… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11480v1-abstract-full').style.display = 'inline'; document.getElementById('2412.11480v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11480v1-abstract-full" style="display: none;"> Accurate precipitation forecasting is crucial for early warnings of disasters, such as floods and landslides. Traditional forecasts rely on ground-based radar systems, which are space-constrained and have high maintenance costs. Consequently, most developing countries depend on a global numerical model with low resolution, instead of operating their own radar systems. To mitigate this gap, we propose the Neural Precipitation Model (NPM), which uses global-scale geostationary satellite imagery. NPM predicts precipitation for up to six hours, with an update every hour. We take three key channels to discriminate rain clouds as input: infrared radiation (at a wavelength of 10.5 $渭m$), upper- (6.3 $渭m$), and lower- (7.3 $渭m$) level water vapor channels. Additionally, NPM introduces positional encoders to capture seasonal and temporal patterns, accounting for variations in precipitation. Our experimental results demonstrate that NPM can predict rainfall in real-time with a resolution of 2 km. The code and dataset are available at https://github.com/seominseok0429/Data-driven-Precipitation-Nowcasting-Using-Satellite-Imagery. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11480v1-abstract-full').style.display = 'none'; document.getElementById('2412.11480v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09842">arXiv:2412.09842</a> <span> [<a href="https://arxiv.org/pdf/2412.09842">pdf</a>, <a href="https://arxiv.org/format/2412.09842">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Programmatically Generated Synthetic Data for Differentially Private Diffusion Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yujin Choi</a>, <a href="/search/cs?searchtype=author&query=Park%2C+J">Jinseong Park</a>, <a href="/search/cs?searchtype=author&query=Byun%2C+J">Junyoung Byun</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jaewook Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09842v1-abstract-short" style="display: inline;"> Programmatically generated synthetic data has been used in differential private training for classification to enhance performance without privacy leakage. However, as the synthetic data is generated from a random process, the distribution of real data and the synthetic data are distinguishable and difficult to transfer. Therefore, the model trained with the synthetic data generates unrealistic ra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09842v1-abstract-full').style.display = 'inline'; document.getElementById('2412.09842v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09842v1-abstract-full" style="display: none;"> Programmatically generated synthetic data has been used in differential private training for classification to enhance performance without privacy leakage. However, as the synthetic data is generated from a random process, the distribution of real data and the synthetic data are distinguishable and difficult to transfer. Therefore, the model trained with the synthetic data generates unrealistic random images, raising challenges to adapt the synthetic data for generative models. In this work, we propose DP-SynGen, which leverages programmatically generated synthetic data in diffusion models to address this challenge. By exploiting the three stages of diffusion models(coarse, context, and cleaning) we identify stages where synthetic data can be effectively utilized. We theoretically and empirically verified that cleaning and coarse stages can be trained without private data, replacing them with synthetic data to reduce the privacy budget. The experimental results show that DP-SynGen improves the quality of generative data by mitigating the negative impact of privacy-induced noise on the generation process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09842v1-abstract-full').style.display = 'none'; document.getElementById('2412.09842v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06966">arXiv:2412.06966</a> <span> [<a href="https://arxiv.org/pdf/2412.06966">pdf</a>, <a href="https://arxiv.org/format/2412.06966">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Machine Unlearning Doesn't Do What You Think: Lessons for Generative AI Policy, Research, and Practice </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cooper%2C+A+F">A. Feder Cooper</a>, <a href="/search/cs?searchtype=author&query=Choquette-Choo%2C+C+A">Christopher A. Choquette-Choo</a>, <a href="/search/cs?searchtype=author&query=Bogen%2C+M">Miranda Bogen</a>, <a href="/search/cs?searchtype=author&query=Jagielski%2C+M">Matthew Jagielski</a>, <a href="/search/cs?searchtype=author&query=Filippova%2C+K">Katja Filippova</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K+Z">Ken Ziyu Liu</a>, <a href="/search/cs?searchtype=author&query=Chouldechova%2C+A">Alexandra Chouldechova</a>, <a href="/search/cs?searchtype=author&query=Hayes%2C+J">Jamie Hayes</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yangsibo Huang</a>, <a href="/search/cs?searchtype=author&query=Mireshghallah%2C+N">Niloofar Mireshghallah</a>, <a href="/search/cs?searchtype=author&query=Shumailov%2C+I">Ilia Shumailov</a>, <a href="/search/cs?searchtype=author&query=Triantafillou%2C+E">Eleni Triantafillou</a>, <a href="/search/cs?searchtype=author&query=Kairouz%2C+P">Peter Kairouz</a>, <a href="/search/cs?searchtype=author&query=Mitchell%2C+N">Nicole Mitchell</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+P">Percy Liang</a>, <a href="/search/cs?searchtype=author&query=Ho%2C+D+E">Daniel E. Ho</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Koyejo%2C+S">Sanmi Koyejo</a>, <a href="/search/cs?searchtype=author&query=Delgado%2C+F">Fernando Delgado</a>, <a href="/search/cs?searchtype=author&query=Grimmelmann%2C+J">James Grimmelmann</a>, <a href="/search/cs?searchtype=author&query=Shmatikov%2C+V">Vitaly Shmatikov</a>, <a href="/search/cs?searchtype=author&query=De+Sa%2C+C">Christopher De Sa</a>, <a href="/search/cs?searchtype=author&query=Barocas%2C+S">Solon Barocas</a>, <a href="/search/cs?searchtype=author&query=Cyphert%2C+A">Amy Cyphert</a>, <a href="/search/cs?searchtype=author&query=Lemley%2C+M">Mark Lemley</a> , et al. (10 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06966v1-abstract-short" style="display: inline;"> We articulate fundamental mismatches between technical methods for machine unlearning in Generative AI, and documented aspirations for broader impact that these methods could have for law and policy. These aspirations are both numerous and varied, motivated by issues that pertain to privacy, copyright, safety, and more. For example, unlearning is often invoked as a solution for removing the effect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06966v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06966v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06966v1-abstract-full" style="display: none;"> We articulate fundamental mismatches between technical methods for machine unlearning in Generative AI, and documented aspirations for broader impact that these methods could have for law and policy. These aspirations are both numerous and varied, motivated by issues that pertain to privacy, copyright, safety, and more. For example, unlearning is often invoked as a solution for removing the effects of targeted information from a generative-AI model's parameters, e.g., a particular individual's personal data or in-copyright expression of Spiderman that was included in the model's training data. Unlearning is also proposed as a way to prevent a model from generating targeted types of information in its outputs, e.g., generations that closely resemble a particular individual's data or reflect the concept of "Spiderman." Both of these goals--the targeted removal of information from a model and the targeted suppression of information from a model's outputs--present various technical and substantive challenges. We provide a framework for thinking rigorously about these challenges, which enables us to be clear about why unlearning is not a general-purpose solution for circumscribing generative-AI model behavior in service of broader positive impact. We aim for conceptual clarity and to encourage more thoughtful communication among machine learning (ML), law, and policy experts who seek to develop and apply technical methods for compliance with policy objectives. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06966v1-abstract-full').style.display = 'none'; document.getElementById('2412.06966v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at the 2nd Workshop on Generative AI and Law at ICML (July 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05481">arXiv:2412.05481</a> <span> [<a href="https://arxiv.org/pdf/2412.05481">pdf</a>, <a href="https://arxiv.org/ps/2412.05481">ps</a>, <a href="https://arxiv.org/format/2412.05481">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Compositional Atlas for Algebraic Circuits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+B">Benjie Wang</a>, <a href="/search/cs?searchtype=author&query=Mau%C3%A1%2C+D+D">Denis Deratani Mau谩</a>, <a href="/search/cs?searchtype=author&query=Broeck%2C+G+V+d">Guy Van den Broeck</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">YooJung Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05481v1-abstract-short" style="display: inline;"> Circuits based on sum-product structure have become a ubiquitous representation to compactly encode knowledge, from Boolean functions to probability distributions. By imposing constraints on the structure of such circuits, certain inference queries become tractable, such as model counting and most probable configuration. Recent works have explored analyzing probabilistic and causal inference queri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05481v1-abstract-full').style.display = 'inline'; document.getElementById('2412.05481v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05481v1-abstract-full" style="display: none;"> Circuits based on sum-product structure have become a ubiquitous representation to compactly encode knowledge, from Boolean functions to probability distributions. By imposing constraints on the structure of such circuits, certain inference queries become tractable, such as model counting and most probable configuration. Recent works have explored analyzing probabilistic and causal inference queries as compositions of basic operators to derive tractability conditions. In this paper, we take an algebraic perspective for compositional inference, and show that a large class of queries - including marginal MAP, probabilistic answer set programming inference, and causal backdoor adjustment - correspond to a combination of basic operators over semirings: aggregation, product, and elementwise mapping. Using this framework, we uncover simple and general sufficient conditions for tractable composition of these operators, in terms of circuit properties (e.g., marginal determinism, compatibility) and conditions on the elementwise mappings. Applying our analysis, we derive novel tractability conditions for many such compositional queries. Our results unify tractability conditions for existing problems on circuits, while providing a blueprint for analysing novel compositional inference queries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05481v1-abstract-full').style.display = 'none'; document.getElementById('2412.05481v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05282">arXiv:2412.05282</a> <span> [<a href="https://arxiv.org/pdf/2412.05282">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> International Scientific Report on the Safety of Advanced AI (Interim Report) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bengio%2C+Y">Yoshua Bengio</a>, <a href="/search/cs?searchtype=author&query=Mindermann%2C+S">S枚ren Mindermann</a>, <a href="/search/cs?searchtype=author&query=Privitera%2C+D">Daniel Privitera</a>, <a href="/search/cs?searchtype=author&query=Besiroglu%2C+T">Tamay Besiroglu</a>, <a href="/search/cs?searchtype=author&query=Bommasani%2C+R">Rishi Bommasani</a>, <a href="/search/cs?searchtype=author&query=Casper%2C+S">Stephen Casper</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Goldfarb%2C+D">Danielle Goldfarb</a>, <a href="/search/cs?searchtype=author&query=Heidari%2C+H">Hoda Heidari</a>, <a href="/search/cs?searchtype=author&query=Khalatbari%2C+L">Leila Khalatbari</a>, <a href="/search/cs?searchtype=author&query=Longpre%2C+S">Shayne Longpre</a>, <a href="/search/cs?searchtype=author&query=Mavroudis%2C+V">Vasilios Mavroudis</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Ng%2C+K+Y">Kwan Yee Ng</a>, <a href="/search/cs?searchtype=author&query=Okolo%2C+C+T">Chinasa T. Okolo</a>, <a href="/search/cs?searchtype=author&query=Raji%2C+D">Deborah Raji</a>, <a href="/search/cs?searchtype=author&query=Skeadas%2C+T">Theodora Skeadas</a>, <a href="/search/cs?searchtype=author&query=Tram%C3%A8r%2C+F">Florian Tram猫r</a>, <a href="/search/cs?searchtype=author&query=Adekanmbi%2C+B">Bayo Adekanmbi</a>, <a href="/search/cs?searchtype=author&query=Christiano%2C+P">Paul Christiano</a>, <a href="/search/cs?searchtype=author&query=Dalrymple%2C+D">David Dalrymple</a>, <a href="/search/cs?searchtype=author&query=Dietterich%2C+T+G">Thomas G. Dietterich</a>, <a href="/search/cs?searchtype=author&query=Felten%2C+E">Edward Felten</a>, <a href="/search/cs?searchtype=author&query=Fung%2C+P">Pascale Fung</a>, <a href="/search/cs?searchtype=author&query=Gourinchas%2C+P">Pierre-Olivier Gourinchas</a> , et al. (19 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05282v1-abstract-short" style="display: inline;"> This is the interim publication of the first International Scientific Report on the Safety of Advanced AI. The report synthesises the scientific understanding of general-purpose AI -- AI that can perform a wide variety of tasks -- with a focus on understanding and managing its risks. A diverse group of 75 AI experts contributed to this report, including an international Expert Advisory Panel nomin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05282v1-abstract-full').style.display = 'inline'; document.getElementById('2412.05282v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05282v1-abstract-full" style="display: none;"> This is the interim publication of the first International Scientific Report on the Safety of Advanced AI. The report synthesises the scientific understanding of general-purpose AI -- AI that can perform a wide variety of tasks -- with a focus on understanding and managing its risks. A diverse group of 75 AI experts contributed to this report, including an international Expert Advisory Panel nominated by 30 countries, the EU, and the UN. Led by the Chair, these independent experts collectively had full discretion over the report's content. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05282v1-abstract-full').style.display = 'none'; document.getElementById('2412.05282v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Available under the open government license at https://www.gov.uk/government/publications/international-scientific-report-on-the-safety-of-advanced-ai</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05233">arXiv:2412.05233</a> <span> [<a href="https://arxiv.org/pdf/2412.05233">pdf</a>, <a href="https://arxiv.org/format/2412.05233">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Physics-informed reduced order model with conditional neural fields </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+M">Minji Kim</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+T">Tianshu Wen</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K">Kookjin Lee</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Youngsoo Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05233v1-abstract-short" style="display: inline;"> This study presents the conditional neural fields for reduced-order modeling (CNF-ROM) framework to approximate solutions of parametrized partial differential equations (PDEs). The approach combines a parametric neural ODE (PNODE) for modeling latent dynamics over time with a decoder that reconstructs PDE solutions from the corresponding latent states. We introduce a physics-informed learning obje… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05233v1-abstract-full').style.display = 'inline'; document.getElementById('2412.05233v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05233v1-abstract-full" style="display: none;"> This study presents the conditional neural fields for reduced-order modeling (CNF-ROM) framework to approximate solutions of parametrized partial differential equations (PDEs). The approach combines a parametric neural ODE (PNODE) for modeling latent dynamics over time with a decoder that reconstructs PDE solutions from the corresponding latent states. We introduce a physics-informed learning objective for CNF-ROM, which includes two key components. First, the framework uses coordinate-based neural networks to calculate and minimize PDE residuals by computing spatial derivatives via automatic differentiation and applying the chain rule for time derivatives. Second, exact initial and boundary conditions (IC/BC) are imposed using approximate distance functions (ADFs) [Sukumar and Srivastava, CMAME, 2022]. However, ADFs introduce a trade-off as their second- or higher-order derivatives become unstable at the joining points of boundaries. To address this, we introduce an auxiliary network inspired by [Gladstone et al., NeurIPS ML4PS workshop, 2022]. Our method is validated through parameter extrapolation and interpolation, temporal extrapolation, and comparisons with analytical solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05233v1-abstract-full').style.display = 'none'; document.getElementById('2412.05233v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 2 figures, NeurIPS 2024 Workshop on Machine Learning and the Physical Sciences</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> LLNL-CONF-869137 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.04577">arXiv:2412.04577</a> <span> [<a href="https://arxiv.org/pdf/2412.04577">pdf</a>, <a href="https://arxiv.org/format/2412.04577">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Data-Driven, Parameterized Reduced-order Models for Predicting Distortion in Metal 3D Printing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deo%2C+I+K">Indu Kant Deo</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Youngsoo Choi</a>, <a href="/search/cs?searchtype=author&query=Khairallah%2C+S+A">Saad A. Khairallah</a>, <a href="/search/cs?searchtype=author&query=Reikher%2C+A">Alexandre Reikher</a>, <a href="/search/cs?searchtype=author&query=Strantza%2C+M">Maria Strantza</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.04577v1-abstract-short" style="display: inline;"> In Laser Powder Bed Fusion (LPBF), the applied laser energy produces high thermal gradients that lead to unacceptable final part distortion. Accurate distortion prediction is essential for optimizing the 3D printing process and manufacturing a part that meets geometric accuracy requirements. This study introduces data-driven parameterized reduced-order models (ROMs) to predict distortion in LPBF a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.04577v1-abstract-full').style.display = 'inline'; document.getElementById('2412.04577v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.04577v1-abstract-full" style="display: none;"> In Laser Powder Bed Fusion (LPBF), the applied laser energy produces high thermal gradients that lead to unacceptable final part distortion. Accurate distortion prediction is essential for optimizing the 3D printing process and manufacturing a part that meets geometric accuracy requirements. This study introduces data-driven parameterized reduced-order models (ROMs) to predict distortion in LPBF across various machine process settings. We propose a ROM framework that combines Proper Orthogonal Decomposition (POD) with Gaussian Process Regression (GPR) and compare its performance against a deep-learning based parameterized graph convolutional autoencoder (GCA). The POD-GPR model demonstrates high accuracy, predicting distortions within $\pm0.001mm$, and delivers a computational speed-up of approximately 1800x. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.04577v1-abstract-full').style.display = 'none'; document.getElementById('2412.04577v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 4 figures, NeurIPS Machine Learning for Physical Sciences workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.03922">arXiv:2412.03922</a> <span> [<a href="https://arxiv.org/pdf/2412.03922">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-031-72114-4_21">10.1007/978-3-031-72114-4_21 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deformation-Aware Segmentation Network Robust to Motion Artifacts for Brain Tissue Segmentation using Disentanglement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jung%2C+S">Sunyoung Jung</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yoonseok Choi</a>, <a href="/search/cs?searchtype=author&query=Al-masni%2C+M+A">Mohammed A. Al-masni</a>, <a href="/search/cs?searchtype=author&query=Jung%2C+M">Minyoung Jung</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dong-Hyun Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.03922v1-abstract-short" style="display: inline;"> Motion artifacts caused by prolonged acquisition time are a significant challenge in Magnetic Resonance Imaging (MRI), hindering accurate tissue segmentation. These artifacts appear as blurred images that mimic tissue-like appearances, making segmentation difficult. This study proposes a novel deep learning framework that demonstrates superior performance in both motion correction and robust brain… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03922v1-abstract-full').style.display = 'inline'; document.getElementById('2412.03922v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.03922v1-abstract-full" style="display: none;"> Motion artifacts caused by prolonged acquisition time are a significant challenge in Magnetic Resonance Imaging (MRI), hindering accurate tissue segmentation. These artifacts appear as blurred images that mimic tissue-like appearances, making segmentation difficult. This study proposes a novel deep learning framework that demonstrates superior performance in both motion correction and robust brain tissue segmentation in the presence of artifacts. The core concept lies in a complementary process: a disentanglement learning network progressively removes artifacts, leading to cleaner images and consequently, more accurate segmentation by a jointly trained motion estimation and segmentation network. This network generates three outputs: a motioncorrected image, a motion deformation map that identifies artifact-affected regions, and a brain tissue segmentation mask. This deformation serves as a guidance mechanism for the disentanglement process, aiding the model in recovering lost information or removing artificial structures introduced by the artifacts. Extensive in-vivo experiments on pediatric motion data demonstrate that our proposed framework outperforms state-of-the-art methods in segmenting motion-corrupted MRI scans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03922v1-abstract-full').style.display = 'none'; document.getElementById('2412.03922v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Medical Image Computing and Computer Assisted Intervention, MICCAI 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Medical Image Computing and Computer Assisted Intervention MICCAI 2024. MICCAI 2024. Lecture Notes in Computer Science, vol 15009. Springer, Cham </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.03784">arXiv:2412.03784</a> <span> [<a href="https://arxiv.org/pdf/2412.03784">pdf</a>, <a href="https://arxiv.org/format/2412.03784">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Speech Recognition-based Feature Extraction for Enhanced Automatic Severity Classification in Dysarthric Speech </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yerin Choi</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jeehyun Lee</a>, <a href="/search/cs?searchtype=author&query=Koo%2C+M">Myoung-Wan Koo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.03784v1-abstract-short" style="display: inline;"> Due to the subjective nature of current clinical evaluation, the need for automatic severity evaluation in dysarthric speech has emerged. DNN models outperform ML models but lack user-friendly explainability. ML models offer explainable results at a feature level, but their performance is comparatively lower. Current ML models extract various features from raw waveforms to predict severity. Howeve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03784v1-abstract-full').style.display = 'inline'; document.getElementById('2412.03784v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.03784v1-abstract-full" style="display: none;"> Due to the subjective nature of current clinical evaluation, the need for automatic severity evaluation in dysarthric speech has emerged. DNN models outperform ML models but lack user-friendly explainability. ML models offer explainable results at a feature level, but their performance is comparatively lower. Current ML models extract various features from raw waveforms to predict severity. However, existing methods do not encompass all dysarthric features used in clinical evaluation. To address this gap, we propose a feature extraction method that minimizes information loss. We introduce an ASR transcription as a novel feature extraction source. We finetune the ASR model for dysarthric speech, then use this model to transcribe dysarthric speech and extract word segment boundary information. It enables capturing finer pronunciation and broader prosodic features. These features demonstrated an improved severity prediction performance to existing features: balanced accuracy of 83.72%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03784v1-abstract-full').style.display = 'none'; document.getElementById('2412.03784v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to SLT 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.01339">arXiv:2412.01339</a> <span> [<a href="https://arxiv.org/pdf/2412.01339">pdf</a>, <a href="https://arxiv.org/format/2412.01339">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Negative Token Merging: Image-based Adversarial Feature Guidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+J">Jaskirat Singh</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lindsey Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weijia Shi</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+R">Ranjay Krishna</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Koh%2C+P+W">Pang Wei Koh</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+M+F">Michael F. Cohen</a>, <a href="/search/cs?searchtype=author&query=Gould%2C+S">Stephen Gould</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+L">Liang Zheng</a>, <a href="/search/cs?searchtype=author&query=Zettlemoyer%2C+L">Luke Zettlemoyer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.01339v2-abstract-short" style="display: inline;"> Text-based adversarial guidance using a negative prompt has emerged as a widely adopted approach to steer diffusion models away from producing undesired concepts. While useful, performing adversarial guidance using text alone can be insufficient to capture complex visual concepts or avoid specific visual elements like copyrighted characters. In this paper, for the first time we explore an alternat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01339v2-abstract-full').style.display = 'inline'; document.getElementById('2412.01339v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.01339v2-abstract-full" style="display: none;"> Text-based adversarial guidance using a negative prompt has emerged as a widely adopted approach to steer diffusion models away from producing undesired concepts. While useful, performing adversarial guidance using text alone can be insufficient to capture complex visual concepts or avoid specific visual elements like copyrighted characters. In this paper, for the first time we explore an alternate modality in this direction by performing adversarial guidance directly using visual features from a reference image or other images in a batch. We introduce negative token merging (NegToMe), a simple but effective training-free approach which performs adversarial guidance through images by selectively pushing apart matching visual features between reference and generated images during the reverse diffusion process. By simply adjusting the used reference, NegToMe enables a diverse range of applications. Notably, when using other images in same batch as reference, we find that NegToMe significantly enhances output diversity (e.g., racial, gender, visual) by guiding features of each image away from others. Similarly, when used w.r.t. copyrighted reference images, NegToMe reduces visual similarity to copyrighted content by 34.57%. NegToMe is simple to implement using just few-lines of code, uses only marginally higher (<4%) inference time and is compatible with different diffusion architectures, including those like Flux, which don't natively support the use of a negative prompt. Code is available at https://negtome.github.io <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01339v2-abstract-full').style.display = 'none'; document.getElementById('2412.01339v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.19114">arXiv:2411.19114</a> <span> [<a href="https://arxiv.org/pdf/2411.19114">pdf</a>, <a href="https://arxiv.org/format/2411.19114">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PREBA: A Hardware/Software Co-Design for Multi-Instance GPU based AI Inference Servers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yeo%2C+G">Gwangoo Yeo</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jiin Kim</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yujeong Choi</a>, <a href="/search/cs?searchtype=author&query=Rhu%2C+M">Minsoo Rhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.19114v1-abstract-short" style="display: inline;"> NVIDIA's Multi-Instance GPU (MIG) is a feature that enables system designers to reconfigure one large GPU into multiple smaller GPU slices. This work characterizes this emerging GPU and evaluates its effectiveness in designing high-performance AI inference servers. Our study reveals that the data preprocessing stage of AI inference causes significant performance bottlenecks to MIG. To this end, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19114v1-abstract-full').style.display = 'inline'; document.getElementById('2411.19114v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.19114v1-abstract-full" style="display: none;"> NVIDIA's Multi-Instance GPU (MIG) is a feature that enables system designers to reconfigure one large GPU into multiple smaller GPU slices. This work characterizes this emerging GPU and evaluates its effectiveness in designing high-performance AI inference servers. Our study reveals that the data preprocessing stage of AI inference causes significant performance bottlenecks to MIG. To this end, we present PREBA, which is a hardware/software co-design targeting MIG inference servers. Our first proposition is an FPGA-based data preprocessing accelerator that unlocks the full potential of MIG with domain-specific acceleration of data preprocessing. The MIG inference server unleashed from preprocessing overheads is then augmented with our dynamic batching system that enables high-performance inference. PREBA is implemented end-to-end in real systems, providing a 3.7x improvement in throughput, 3.4x reduction in tail latency, 3.5x improvement in energy-efficiency, and 3.0x improvement in cost-efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19114v1-abstract-full').style.display = 'none'; document.getElementById('2411.19114v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16801">arXiv:2411.16801</a> <span> [<a href="https://arxiv.org/pdf/2411.16801">pdf</a>, <a href="https://arxiv.org/format/2411.16801">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Controllable Human Image Generation with Personalized Multi-Garments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yisol Choi</a>, <a href="/search/cs?searchtype=author&query=Kwak%2C+S">Sangkyung Kwak</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Sihyun Yu</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+H">Hyungwon Choi</a>, <a href="/search/cs?searchtype=author&query=Shin%2C+J">Jinwoo Shin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16801v1-abstract-short" style="display: inline;"> We present BootComp, a novel framework based on text-to-image diffusion models for controllable human image generation with multiple reference garments. Here, the main bottleneck is data acquisition for training: collecting a large-scale dataset of high-quality reference garment images per human subject is quite challenging, i.e., ideally, one needs to manually gather every single garment photogra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16801v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16801v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16801v1-abstract-full" style="display: none;"> We present BootComp, a novel framework based on text-to-image diffusion models for controllable human image generation with multiple reference garments. Here, the main bottleneck is data acquisition for training: collecting a large-scale dataset of high-quality reference garment images per human subject is quite challenging, i.e., ideally, one needs to manually gather every single garment photograph worn by each human. To address this, we propose a data generation pipeline to construct a large synthetic dataset, consisting of human and multiple-garment pairs, by introducing a model to extract any reference garment images from each human image. To ensure data quality, we also propose a filtering strategy to remove undesirable generated data based on measuring perceptual similarities between the garment presented in human image and extracted garment. Finally, by utilizing the constructed synthetic dataset, we train a diffusion model having two parallel denoising paths that use multiple garment images as conditions to generate human images while preserving their fine-grained details. We further show the wide-applicability of our framework by adapting it to different types of reference-based generation in the fashion domain, including virtual try-on, and controllable human image generation with other conditions, e.g., pose, face, etc. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16801v1-abstract-full').style.display = 'none'; document.getElementById('2411.16801v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://yisol.github.io/BootComp</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14788">arXiv:2411.14788</a> <span> [<a href="https://arxiv.org/pdf/2411.14788">pdf</a>, <a href="https://arxiv.org/format/2411.14788">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Jovis: A Visualization Tool for PostgreSQL Query Optimizer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yoojin Choi</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">Juhee Han</a>, <a href="/search/cs?searchtype=author&query=Koo%2C+K">Kyoseung Koo</a>, <a href="/search/cs?searchtype=author&query=Moon%2C+B">Bongki Moon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14788v1-abstract-short" style="display: inline;"> In the world of relational database management, the query optimizer is a critical component that significantly impacts query performance. To address the challenge of optimizing query performance due to the complexity of optimizers -- especially with join operations -- we introduce Jovis. This novel visualization tool provides a window into the often intricate process of query optimization in Postg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14788v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14788v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14788v1-abstract-full" style="display: none;"> In the world of relational database management, the query optimizer is a critical component that significantly impacts query performance. To address the challenge of optimizing query performance due to the complexity of optimizers -- especially with join operations -- we introduce Jovis. This novel visualization tool provides a window into the often intricate process of query optimization in PostgreSQL, making it more accessible and understandable. PostgreSQL employs two different query optimization strategies: the Dynamic Programming (DP) Optimizer for most scenarios and the Genetic Query Optimizer (GEQO) for more complex queries with numerous joins, both of which are supported in Jovis. Our tool visualizes the optimizer's decision-making process, from evaluating access paths for each relation to determining join orderings, all using data derived from the optimizer's logs. Jovis not only clarifies the query optimization process through visualizations but also serves as an invaluable learning tool for learners and a practical resource for experienced database professionals looking to optimize their query performance or even the query optimizer itself. The source code has been made available at https://github.com/snu-jovis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14788v1-abstract-full').style.display = 'none'; document.getElementById('2411.14788v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11302">arXiv:2411.11302</a> <span> [<a href="https://arxiv.org/pdf/2411.11302">pdf</a>, <a href="https://arxiv.org/format/2411.11302">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Personalized Brain-Computer Interface Application Based on Endogenous EEG Paradigms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kwak%2C+H">Heon-Gyu Kwak</a>, <a href="/search/cs?searchtype=author&query=Shin%2C+G">Gi-Hwan Shin</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yeon-Woo Choi</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+D">Dong-Hoon Lee</a>, <a href="/search/cs?searchtype=author&query=Jeon%2C+Y">Yoo-In Jeon</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+J">Jun-Su Kang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">Seong-Whan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11302v1-abstract-short" style="display: inline;"> In this paper, we propose a conceptual framework for personalized brain-computer interface (BCI) applications, which can offer an enhanced user experience by customizing services to individual preferences and needs, based on endogenous electroencephalography (EEG) paradigms including motor imagery (MI), speech imagery (SI), and visual imagery. The framework includes two essential components: user… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11302v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11302v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11302v1-abstract-full" style="display: none;"> In this paper, we propose a conceptual framework for personalized brain-computer interface (BCI) applications, which can offer an enhanced user experience by customizing services to individual preferences and needs, based on endogenous electroencephalography (EEG) paradigms including motor imagery (MI), speech imagery (SI), and visual imagery. The framework includes two essential components: user identification and intention classification, which enable personalized services by identifying individual users and recognizing their intended actions through EEG signals. We validate the feasibility of our framework using a private EEG dataset collected from eight subjects, employing the ShallowConvNet architecture to decode EEG features. The experimental results demonstrate that user identification achieved an average classification accuracy of 0.995, while intention classification achieved 0.47 accuracy across all paradigms, with MI demonstrating the best performance. These findings indicate that EEG signals can effectively support personalized BCI applications, offering robust identification and reliable intention decoding, especially for MI and SI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11302v1-abstract-full').style.display = 'none'; document.getElementById('2411.11302v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submissoion version for IEEE International BCI Winter Conference 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10005">arXiv:2411.10005</a> <span> [<a href="https://arxiv.org/pdf/2411.10005">pdf</a>, <a href="https://arxiv.org/format/2411.10005">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Analyzing Performance Characteristics of PostgreSQL and MariaDB on NVMeVirt </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+J">Juhee Han</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yoojin Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10005v1-abstract-short" style="display: inline;"> The NVMeVirt paper analyzes the implication of storage performance on database engine performance to promote the tunable performance of NVMeVirt. They perform analysis on two very popular database engines, MariaDB and PostgreSQL. The result shows that MariaDB is more efficient when the storage is slow, but PostgreSQL outperforms MariaDB as I/O bandwidth increases. Although this verifies that NVMeV… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10005v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10005v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10005v1-abstract-full" style="display: none;"> The NVMeVirt paper analyzes the implication of storage performance on database engine performance to promote the tunable performance of NVMeVirt. They perform analysis on two very popular database engines, MariaDB and PostgreSQL. The result shows that MariaDB is more efficient when the storage is slow, but PostgreSQL outperforms MariaDB as I/O bandwidth increases. Although this verifies that NVMeVirt can support advanced storage bandwidth configurations, the paper does not provide a clear explanation of why two database engines react very differently to the storage performance. To understand why the above two database engines have different performance characteristics, we conduct a study of the database engine's internals. We focus on three major differences in Multi-version concurrency control (MVCC) implementations: version storage, garbage collection, and index management. We also evaluated each scheme's I/O overhead using OLTP workload. Our analysis identifies the reason why MariaDB outperforms PostgreSQL when the bandwidth is low. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10005v1-abstract-full').style.display = 'none'; document.getElementById('2411.10005v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09997">arXiv:2411.09997</a> <span> [<a href="https://arxiv.org/pdf/2411.09997">pdf</a>, <a href="https://arxiv.org/format/2411.09997">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> DBenVis: A Visual Analytics System for Comparing DBMS Performance via Benchmark Programs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yoojin Choi</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">Juhee Han</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Daehyun Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09997v1-abstract-short" style="display: inline;"> Database benchmarking is an essential method for evaluating and comparing the performance characteristics of a database management system (DBMS). It helps researchers and developers to evaluate the efficacy of their optimizations or newly developed DBMS solutions. Also, companies can benefit by analyzing the performance of DBMS under specific workloads and leveraging the result to select the most… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09997v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09997v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09997v1-abstract-full" style="display: none;"> Database benchmarking is an essential method for evaluating and comparing the performance characteristics of a database management system (DBMS). It helps researchers and developers to evaluate the efficacy of their optimizations or newly developed DBMS solutions. Also, companies can benefit by analyzing the performance of DBMS under specific workloads and leveraging the result to select the most suitable system for their needs. The proper interpretation of raw benchmark results requires effective visualization, which helps users gain meaningful insights. However, visualization of the results requires prior knowledge, and existing approaches often involve time-consuming manual tasks. This is due to the absence of a unified visual analytics system for benchmark results across diverse DBMSs. To address these challenges, we present DBenVis, an interactive visual analytics system that provides efficient and versatile benchmark results visualization. DBenVis is designed to support both online transaction processing (OLTP) and online analytic processing (OLAP) benchmarks. DBenVis provides an interactive comparison view, which enables users to perform an in-depth analysis of performance characteristics across various metrics among different DBMSs. Notably, we devise an interactive visual encoding idiom for the OLAP benchmark to represent a query execution plan as a tree. In the process of building a system, we propose novel techniques for parsing meaningful data from raw benchmark results and converting the query plan to a D3 hierarchical format. Through case studies conducted with domain experts, we demonstrate the efficacy and usability of DBenVis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09997v1-abstract-full').style.display = 'none'; document.getElementById('2411.09997v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08404">arXiv:2411.08404</a> <span> [<a href="https://arxiv.org/pdf/2411.08404">pdf</a>, <a href="https://arxiv.org/format/2411.08404">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Finance">q-fin.CP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Quantifying Qualitative Insights: Leveraging LLMs to Market Predict </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+H">Hoyoung Lee</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Youngsoo Choi</a>, <a href="/search/cs?searchtype=author&query=Kwon%2C+Y">Yuhee Kwon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08404v1-abstract-short" style="display: inline;"> Recent advancements in Large Language Models (LLMs) have the potential to transform financial analytics by integrating numerical and textual data. However, challenges such as insufficient context when fusing multimodal information and the difficulty in measuring the utility of qualitative outputs, which LLMs generate as text, have limited their effectiveness in tasks such as financial forecasting.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08404v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08404v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08404v1-abstract-full" style="display: none;"> Recent advancements in Large Language Models (LLMs) have the potential to transform financial analytics by integrating numerical and textual data. However, challenges such as insufficient context when fusing multimodal information and the difficulty in measuring the utility of qualitative outputs, which LLMs generate as text, have limited their effectiveness in tasks such as financial forecasting. This study addresses these challenges by leveraging daily reports from securities firms to create high-quality contextual information. The reports are segmented into text-based key factors and combined with numerical data, such as price information, to form context sets. By dynamically updating few-shot examples based on the query time, the sets incorporate the latest information, forming a highly relevant set closely aligned with the query point. Additionally, a crafted prompt is designed to assign scores to the key factors, converting qualitative insights into quantitative results. The derived scores undergo a scaling process, transforming them into real-world values that are used for prediction. Our experiments demonstrate that LLMs outperform time-series models in market forecasting, though challenges such as imperfect reproducibility and limited explainability remain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08404v1-abstract-full').style.display = 'none'; document.getElementById('2411.08404v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07461">arXiv:2411.07461</a> <span> [<a href="https://arxiv.org/pdf/2411.07461">pdf</a>, <a href="https://arxiv.org/format/2411.07461">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> BLIP3-KALE: Knowledge Augmented Large-Scale Dense Captions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Awadalla%2C+A">Anas Awadalla</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+L">Le Xue</a>, <a href="/search/cs?searchtype=author&query=Shu%2C+M">Manli Shu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+A">An Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jun Wang</a>, <a href="/search/cs?searchtype=author&query=Purushwalkam%2C+S">Senthil Purushwalkam</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+S">Sheng Shen</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Hannah Lee</a>, <a href="/search/cs?searchtype=author&query=Lo%2C+O">Oscar Lo</a>, <a href="/search/cs?searchtype=author&query=Park%2C+J+S">Jae Sung Park</a>, <a href="/search/cs?searchtype=author&query=Guha%2C+E">Etash Guha</a>, <a href="/search/cs?searchtype=author&query=Savarese%2C+S">Silvio Savarese</a>, <a href="/search/cs?searchtype=author&query=Schmidt%2C+L">Ludwig Schmidt</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+C">Caiming Xiong</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Ran Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07461v1-abstract-short" style="display: inline;"> We introduce BLIP3-KALE, a dataset of 218 million image-text pairs that bridges the gap between descriptive synthetic captions and factual web-scale alt-text. KALE augments synthetic dense image captions with web-scale alt-text to generate factually grounded image captions. Our two-stage approach leverages large vision-language models and language models to create knowledge-augmented captions, whi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07461v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07461v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07461v1-abstract-full" style="display: none;"> We introduce BLIP3-KALE, a dataset of 218 million image-text pairs that bridges the gap between descriptive synthetic captions and factual web-scale alt-text. KALE augments synthetic dense image captions with web-scale alt-text to generate factually grounded image captions. Our two-stage approach leverages large vision-language models and language models to create knowledge-augmented captions, which are then used to train a specialized VLM for scaling up the dataset. We train vision-language models on KALE and demonstrate improvements on vision-language tasks. Our experiments show the utility of KALE for training more capable and knowledgeable multimodal models. We release the KALE dataset at https://huggingface.co/datasets/Salesforce/blip3-kale <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07461v1-abstract-full').style.display = 'none'; document.getElementById('2411.07461v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06367">arXiv:2411.06367</a> <span> [<a href="https://arxiv.org/pdf/2411.06367">pdf</a>, <a href="https://arxiv.org/format/2411.06367">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> BayesNAM: Leveraging Inconsistency for Reliable Explanations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+H">Hoki Kim</a>, <a href="/search/cs?searchtype=author&query=Park%2C+J">Jinseong Park</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yujin Choi</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">Seungyun Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jaewook Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06367v1-abstract-short" style="display: inline;"> Neural additive model (NAM) is a recently proposed explainable artificial intelligence (XAI) method that utilizes neural network-based architectures. Given the advantages of neural networks, NAMs provide intuitive explanations for their predictions with high model performance. In this paper, we analyze a critical yet overlooked phenomenon: NAMs often produce inconsistent explanations, even when us… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06367v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06367v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06367v1-abstract-full" style="display: none;"> Neural additive model (NAM) is a recently proposed explainable artificial intelligence (XAI) method that utilizes neural network-based architectures. Given the advantages of neural networks, NAMs provide intuitive explanations for their predictions with high model performance. In this paper, we analyze a critical yet overlooked phenomenon: NAMs often produce inconsistent explanations, even when using the same architecture and dataset. Traditionally, such inconsistencies have been viewed as issues to be resolved. However, we argue instead that these inconsistencies can provide valuable explanations within the given data model. Through a simple theoretical framework, we demonstrate that these inconsistencies are not mere artifacts but emerge naturally in datasets with multiple important features. To effectively leverage this information, we introduce a novel framework, Bayesian Neural Additive Model (BayesNAM), which integrates Bayesian neural networks and feature dropout, with theoretical proof demonstrating that feature dropout effectively captures model inconsistencies. Our experiments demonstrate that BayesNAM effectively reveals potential problems such as insufficient data or structural limitations of the model, providing more reliable explanations and potential remedies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06367v1-abstract-full').style.display = 'none'; document.getElementById('2411.06367v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under Review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05846">arXiv:2411.05846</a> <span> [<a href="https://arxiv.org/pdf/2411.05846">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Reducing catastrophic forgetting of incremental learning in the absence of rehearsal memory with task-specific token </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Choi%2C+Y+J">Young Jo Choi</a>, <a href="/search/cs?searchtype=author&query=Yoo%2C+M+K">Min Kyoon Yoo</a>, <a href="/search/cs?searchtype=author&query=Park%2C+Y+R">Yu Rang Park</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05846v1-abstract-short" style="display: inline;"> Deep learning models generally display catastrophic forgetting when learning new data continuously. Many incremental learning approaches address this problem by reusing data from previous tasks while learning new tasks. However, the direct access to past data generates privacy and security concerns. To address these issues, we present a novel method that preserves previous knowledge without storin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05846v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05846v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05846v1-abstract-full" style="display: none;"> Deep learning models generally display catastrophic forgetting when learning new data continuously. Many incremental learning approaches address this problem by reusing data from previous tasks while learning new tasks. However, the direct access to past data generates privacy and security concerns. To address these issues, we present a novel method that preserves previous knowledge without storing previous data. This method is inspired by the architecture of a vision transformer and employs a unique token capable of encapsulating the compressed knowledge of each task. This approach generates task-specific embeddings by directing attention differently based on the task associated with the data, thereby effectively mimicking the impact of having multiple models through tokens. Our method incorporates a distillation process that ensures efficient interactions even after multiple additional learning steps, thereby optimizing the model against forgetting. We measured the performance of our model in terms of accuracy and backward transfer using a benchmark dataset for different task-incremental learning scenarios. Our results demonstrate the superiority of our approach, which achieved the highest accuracy and lowest backward transfer among the compared methods. In addition to presenting a new model, our approach lays the foundation for various extensions within the spectrum of vision-transformer architectures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05846v1-abstract-full').style.display = 'none'; document.getElementById('2411.05846v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05384">arXiv:2411.05384</a> <span> [<a href="https://arxiv.org/pdf/2411.05384">pdf</a>, <a href="https://arxiv.org/format/2411.05384">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Advancing Meteorological Forecasting: AI-based Approach to Synoptic Weather Map Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yo-Hwan Choi</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+S">Seon-Yu Kang</a>, <a href="/search/cs?searchtype=author&query=Cheon%2C+M">Minjong Cheon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05384v1-abstract-short" style="display: inline;"> As global warming increases the complexity of weather patterns; the precision of weather forecasting becomes increasingly important. Our study proposes a novel preprocessing method and convolutional autoencoder model developed to improve the interpretation of synoptic weather maps. These are critical for meteorologists seeking a thorough understanding of weather conditions. This model could recogn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05384v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05384v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05384v1-abstract-full" style="display: none;"> As global warming increases the complexity of weather patterns; the precision of weather forecasting becomes increasingly important. Our study proposes a novel preprocessing method and convolutional autoencoder model developed to improve the interpretation of synoptic weather maps. These are critical for meteorologists seeking a thorough understanding of weather conditions. This model could recognize historical synoptic weather maps that nearly match current atmospheric conditions, marking a significant step forward in modern technology in meteorological forecasting. This comprises unsupervised learning models like VQ-VQE, as well as supervised learning models like VGG16, VGG19, Xception, InceptionV3, and ResNet50 trained on the ImageNet dataset, as well as research into newer models like EfficientNet and ConvNeXt. Our findings proved that, while these models perform well in various settings, their ability to identify comparable synoptic weather maps has certain limits. Our research, motivated by the primary goal of significantly increasing meteorologists' efficiency in labor-intensive tasks, discovered that cosine similarity is the most effective metric, as determined by a combination of quantitative and qualitative assessments to accurately identify relevant historical weather patterns. This study broadens our understanding by shifting the emphasis from numerical precision to practical application, ensuring that our model is effective in theory practical, and accessible in the complex and dynamic field of meteorology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05384v1-abstract-full').style.display = 'none'; document.getElementById('2411.05384v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05254">arXiv:2411.05254</a> <span> [<a href="https://arxiv.org/pdf/2411.05254">pdf</a>, <a href="https://arxiv.org/format/2411.05254">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical Visual Feature Aggregation for OCR-Free Document Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Park%2C+J">Jaeyoo Park</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+J+Y">Jin Young Choi</a>, <a href="/search/cs?searchtype=author&query=Park%2C+J">Jeonghyung Park</a>, <a href="/search/cs?searchtype=author&query=Han%2C+B">Bohyung Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05254v1-abstract-short" style="display: inline;"> We present a novel OCR-free document understanding framework based on pretrained Multimodal Large Language Models (MLLMs). Our approach employs multi-scale visual features to effectively handle various font sizes within document images. To address the increasing costs of considering the multi-scale visual inputs for MLLMs, we propose the Hierarchical Visual Feature Aggregation (HVFA) module, desig… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05254v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05254v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05254v1-abstract-full" style="display: none;"> We present a novel OCR-free document understanding framework based on pretrained Multimodal Large Language Models (MLLMs). Our approach employs multi-scale visual features to effectively handle various font sizes within document images. To address the increasing costs of considering the multi-scale visual inputs for MLLMs, we propose the Hierarchical Visual Feature Aggregation (HVFA) module, designed to reduce the number of input tokens to LLMs. Leveraging a feature pyramid with cross-attentive pooling, our approach effectively manages the trade-off between information loss and efficiency without being affected by varying document image sizes. Furthermore, we introduce a novel instruction tuning task, which facilitates the model's text-reading capability by learning to predict the relative positions of input text, eventually minimizing the risk of truncated text caused by the limited capacity of LLMs. Comprehensive experiments validate the effectiveness of our approach, demonstrating superior performance in various document understanding tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05254v1-abstract-full').style.display = 'none'; document.getElementById('2411.05254v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03554">arXiv:2411.03554</a> <span> [<a href="https://arxiv.org/pdf/2411.03554">pdf</a>, <a href="https://arxiv.org/format/2411.03554">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking Vision Language Model Unlearning via Fictitious Facial Identity Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yingzi Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiongxiao Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Fei Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Siyuan Ma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiazhao Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiujun Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Furong Huang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Lichao Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Muhao Chen</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+C">Chaowei Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03554v2-abstract-short" style="display: inline;"> Machine unlearning has emerged as an effective strategy for forgetting specific information in the training data. However, with the increasing integration of visual data, privacy concerns in Vision Language Models (VLMs) remain underexplored. To address this, we introduce Facial Identity Unlearning Benchmark (FIUBench), a novel VLM unlearning benchmark designed to robustly evaluate the effectivene… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03554v2-abstract-full').style.display = 'inline'; document.getElementById('2411.03554v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03554v2-abstract-full" style="display: none;"> Machine unlearning has emerged as an effective strategy for forgetting specific information in the training data. However, with the increasing integration of visual data, privacy concerns in Vision Language Models (VLMs) remain underexplored. To address this, we introduce Facial Identity Unlearning Benchmark (FIUBench), a novel VLM unlearning benchmark designed to robustly evaluate the effectiveness of unlearning algorithms under the Right to be Forgotten setting. Specifically, we formulate the VLM unlearning task via constructing the Fictitious Facial Identity VQA dataset and apply a two-stage evaluation pipeline that is designed to precisely control the sources of information and their exposure levels. In terms of evaluation, since VLM supports various forms of ways to ask questions with the same semantic meaning, we also provide robust evaluation metrics including membership inference attacks and carefully designed adversarial privacy attacks to evaluate the performance of algorithms. Through the evaluation of four baseline VLM unlearning algorithms within FIUBench, we find that all methods remain limited in their unlearning performance, with significant trade-offs between model utility and forget quality. Furthermore, our findings also highlight the importance of privacy attacks for robust evaluations. We hope FIUBench will drive progress in developing more effective VLM unlearning algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03554v2-abstract-full').style.display = 'none'; document.getElementById('2411.03554v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01019">arXiv:2411.01019</a> <span> [<a href="https://arxiv.org/pdf/2411.01019">pdf</a>, <a href="https://arxiv.org/format/2411.01019">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A lightweight Convolutional Neural Network based on U shape structure and Attention Mechanism for Anterior Mediastinum Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Soleimani-Fard%2C+S">Sina Soleimani-Fard</a>, <a href="/search/cs?searchtype=author&query=Jeong%2C+W+G">Won Gi Jeong</a>, <a href="/search/cs?searchtype=author&query=Ripalda%2C+F+F">Francis Ferri Ripalda</a>, <a href="/search/cs?searchtype=author&query=Sasani%2C+H">Hasti Sasani</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Younhee Choi</a>, <a href="/search/cs?searchtype=author&query=Deiva%2C+S">S Deiva</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+G+Y">Gong Yong Jin</a>, <a href="/search/cs?searchtype=author&query=Ko%2C+S">Seok-bum Ko</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01019v1-abstract-short" style="display: inline;"> To automatically detect Anterior Mediastinum Lesions (AMLs) in the Anterior Mediastinum (AM), the primary requirement will be an automatic segmentation model specifically designed for the AM. The prevalence of AML is extremely low, making it challenging to conduct screening research similar to lung cancer screening. Retrospectively reviewing chest CT scans over a specific period to investigate the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01019v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01019v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01019v1-abstract-full" style="display: none;"> To automatically detect Anterior Mediastinum Lesions (AMLs) in the Anterior Mediastinum (AM), the primary requirement will be an automatic segmentation model specifically designed for the AM. The prevalence of AML is extremely low, making it challenging to conduct screening research similar to lung cancer screening. Retrospectively reviewing chest CT scans over a specific period to investigate the prevalence of AML requires substantial time. Therefore, developing an Artificial Intelligence (AI) model to find location of AM helps radiologist to enhance their ability to manage workloads and improve diagnostic accuracy for AMLs. In this paper, we introduce a U-shaped structure network to segment AM. Two attention mechanisms were used for maintaining long-range dependencies and localization. In order to have the potential of Multi-Head Self-Attention (MHSA) and a lightweight network, we designed a parallel MHSA named Wide-MHSA (W-MHSA). Maintaining long-range dependencies is crucial for segmentation when we upsample feature maps. Therefore, we designed a Dilated Depth-Wise Parallel Path connection (DDWPP) for this purpose. In order to design a lightweight architecture, we introduced an expanding convolution block and combine it with the proposed W-MHSA for feature extraction in the encoder part of the proposed U-shaped network. The proposed network was trained on 2775 AM cases, which obtained an average Dice Similarity Coefficient (DSC) of 87.83%, mean Intersection over Union (IoU) of 79.16%, and Sensitivity of 89.60%. Our proposed architecture exhibited superior segmentation performance compared to the most advanced segmentation networks, such as Trans Unet, Attention Unet, Res Unet, and Res Unet++. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01019v1-abstract-full').style.display = 'none'; document.getElementById('2411.01019v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00204">arXiv:2411.00204</a> <span> [<a href="https://arxiv.org/pdf/2411.00204">pdf</a>, <a href="https://arxiv.org/format/2411.00204">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RESTOR: Knowledge Recovery through Machine Unlearning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rezaei%2C+K">Keivan Rezaei</a>, <a href="/search/cs?searchtype=author&query=Chandu%2C+K">Khyathi Chandu</a>, <a href="/search/cs?searchtype=author&query=Feizi%2C+S">Soheil Feizi</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Brahman%2C+F">Faeze Brahman</a>, <a href="/search/cs?searchtype=author&query=Ravichander%2C+A">Abhilasha Ravichander</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00204v2-abstract-short" style="display: inline;"> Large language models trained on web-scale corpora can memorize undesirable datapoints such as incorrect facts, copyrighted content or sensitive data. Recently, many machine unlearning algorithms have been proposed that aim to `erase' these datapoints from trained models -- that is, revert model behavior to be similar to a model that had never been trained on these datapoints. However, evaluating… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00204v2-abstract-full').style.display = 'inline'; document.getElementById('2411.00204v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00204v2-abstract-full" style="display: none;"> Large language models trained on web-scale corpora can memorize undesirable datapoints such as incorrect facts, copyrighted content or sensitive data. Recently, many machine unlearning algorithms have been proposed that aim to `erase' these datapoints from trained models -- that is, revert model behavior to be similar to a model that had never been trained on these datapoints. However, evaluating the success of unlearning algorithms remains an open challenge. In this work, we propose the RESTOR framework for machine unlearning, which evaluates the ability of unlearning algorithms to perform targeted data erasure from models, by evaluating the ability of models to forget the knowledge introduced in these data points, while simultaneously recovering the model's knowledge state had it not encountered these datapoints. RESTOR helps uncover several novel insights about popular unlearning algorithms, and the mechanisms through which they operate -- for instance, identifying that some algorithms merely emphasize forgetting, and that localizing unlearning targets can enhance unlearning performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00204v2-abstract-full').style.display = 'none'; document.getElementById('2411.00204v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19154">arXiv:2410.19154</a> <span> [<a href="https://arxiv.org/pdf/2410.19154">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Cross Spline Net and a Unified World </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+L">Linwei Hu</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y+J">Ye Jin Choi</a>, <a href="/search/cs?searchtype=author&query=Nair%2C+V+N">Vijayan N. Nair</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19154v1-abstract-short" style="display: inline;"> In today's machine learning world for tabular data, XGBoost and fully connected neural network (FCNN) are two most popular methods due to their good model performance and convenience to use. However, they are highly complicated, hard to interpret, and can be overfitted. In this paper, we propose a new modeling framework called cross spline net (CSN) that is based on a combination of spline transfo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19154v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19154v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19154v1-abstract-full" style="display: none;"> In today's machine learning world for tabular data, XGBoost and fully connected neural network (FCNN) are two most popular methods due to their good model performance and convenience to use. However, they are highly complicated, hard to interpret, and can be overfitted. In this paper, we propose a new modeling framework called cross spline net (CSN) that is based on a combination of spline transformation and cross-network (Wang et al. 2017, 2021). We will show CSN is as performant and convenient to use, and is less complicated, more interpretable and robust. Moreover, the CSN framework is flexible, as the spline layer can be configured differently to yield different models. With different choices of the spline layer, we can reproduce or approximate a set of non-neural network models, including linear and spline-based statistical models, tree, rule-fit, tree-ensembles (gradient boosting trees, random forest), oblique tree/forests, multi-variate adaptive regression spline (MARS), SVM with polynomial kernel, etc. Therefore, CSN provides a unified modeling framework that puts the above set of non-neural network models under the same neural network framework. By using scalable and powerful gradient descent algorithms available in neural network libraries, CSN avoids some pitfalls (such as being ad-hoc, greedy or non-scalable) in the case-specific optimization methods used in the above non-neural network models. We will use a special type of CSN, TreeNet, to illustrate our point. We will compare TreeNet with XGBoost and FCNN to show the benefits of TreeNet. We believe CSN will provide a flexible and convenient framework for practitioners to build performant, robust and more interpretable models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19154v1-abstract-full').style.display = 'none'; document.getElementById('2410.19154v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18823">arXiv:2410.18823</a> <span> [<a href="https://arxiv.org/pdf/2410.18823">pdf</a>, <a href="https://arxiv.org/format/2410.18823">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Visual Text Design Transfer Across Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Chung%2C+J">Jiwan Chung</a>, <a href="/search/cs?searchtype=author&query=Shim%2C+S">Sumin Shim</a>, <a href="/search/cs?searchtype=author&query=Oh%2C+G">Giyeong Oh</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Youngjae Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18823v2-abstract-short" style="display: inline;"> Visual text design plays a critical role in conveying themes, emotions, and atmospheres in multimodal formats such as film posters and album covers. Translating these visual and textual elements across languages extends the concept of translation beyond mere text, requiring the adaptation of aesthetic and stylistic features. To address this, we introduce a novel task of Multimodal Style Translatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18823v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18823v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18823v2-abstract-full" style="display: none;"> Visual text design plays a critical role in conveying themes, emotions, and atmospheres in multimodal formats such as film posters and album covers. Translating these visual and textual elements across languages extends the concept of translation beyond mere text, requiring the adaptation of aesthetic and stylistic features. To address this, we introduce a novel task of Multimodal Style Translation (MuST-Bench), a benchmark designed to evaluate the ability of visual text generation models to perform translation across different writing systems while preserving design intent. Our initial experiments on MuST-Bench reveal that existing visual text generation models struggle with the proposed task due to the inadequacy of textual descriptions in conveying visual design. In response, we introduce SIGIL, a framework for multimodal style translation that eliminates the need for style descriptions. SIGIL enhances image generation models through three innovations: glyph latent for multilingual settings, pretrained VAEs for stable style guidance, and an OCR model with reinforcement learning feedback for optimizing readable character generation. SIGIL outperforms existing baselines by achieving superior style consistency and legibility while maintaining visual fidelity, setting itself apart from traditional description-based approaches. We release MuST-Bench publicly for broader use and exploration https://huggingface.co/datasets/yejinc/MuST-Bench. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18823v2-abstract-full').style.display = 'none'; document.getElementById('2410.18823v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16665">arXiv:2410.16665</a> <span> [<a href="https://arxiv.org/pdf/2410.16665">pdf</a>, <a href="https://arxiv.org/format/2410.16665">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> SafetyAnalyst: Interpretable, transparent, and steerable safety moderation for AI behavior </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jing-Jing Li</a>, <a href="/search/cs?searchtype=author&query=Pyatkin%2C+V">Valentina Pyatkin</a>, <a href="/search/cs?searchtype=author&query=Kleiman-Weiner%2C+M">Max Kleiman-Weiner</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Liwei Jiang</a>, <a href="/search/cs?searchtype=author&query=Dziri%2C+N">Nouha Dziri</a>, <a href="/search/cs?searchtype=author&query=Collins%2C+A+G+E">Anne G. E. Collins</a>, <a href="/search/cs?searchtype=author&query=Borg%2C+J+S">Jana Schaich Borg</a>, <a href="/search/cs?searchtype=author&query=Sap%2C+M">Maarten Sap</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Levine%2C+S">Sydney Levine</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16665v2-abstract-short" style="display: inline;"> The ideal AI safety moderation system would be both structurally interpretable (so its decisions can be reliably explained) and steerable (to align to safety standards and reflect a community's values), which current systems fall short on. To address this gap, we present SafetyAnalyst, a novel AI safety moderation framework. Given an AI behavior, SafetyAnalyst uses chain-of-thought reasoning to an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16665v2-abstract-full').style.display = 'inline'; document.getElementById('2410.16665v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16665v2-abstract-full" style="display: none;"> The ideal AI safety moderation system would be both structurally interpretable (so its decisions can be reliably explained) and steerable (to align to safety standards and reflect a community's values), which current systems fall short on. To address this gap, we present SafetyAnalyst, a novel AI safety moderation framework. Given an AI behavior, SafetyAnalyst uses chain-of-thought reasoning to analyze its potential consequences by creating a structured "harm-benefit tree," which enumerates harmful and beneficial actions and effects the AI behavior may lead to, along with likelihood, severity, and immediacy labels that describe potential impact on any stakeholders. SafetyAnalyst then aggregates all harmful and beneficial effects into a harmfulness score using fully interpretable weight parameters, which can be aligned to particular safety preferences. We applied this conceptual framework to develop, test, and release an open-source LLM prompt safety classification system, distilled from 18.5 million harm-benefit features generated by frontier LLMs on 19k prompts. On a comprehensive set of prompt safety benchmarks, we show that SafetyReporter (average F1=0.81) outperforms existing LLM safety moderation systems (average F1$<$0.72) on prompt safety classification, while offering the additional advantages of interpretability, transparency, and steerability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16665v2-abstract-full').style.display = 'none'; document.getElementById('2410.16665v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15008">arXiv:2410.15008</a> <span> [<a href="https://arxiv.org/pdf/2410.15008">pdf</a>, <a href="https://arxiv.org/format/2410.15008">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3620666.3651324">10.1145/3620666.3651324 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> IANUS: Integrated Accelerator based on NPU-PIM Unified Memory System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Seo%2C+M">Minseok Seo</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+X+T">Xuan Truong Nguyen</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+S+J">Seok Joong Hwang</a>, <a href="/search/cs?searchtype=author&query=Kwon%2C+Y">Yongkee Kwon</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+G">Guhyun Kim</a>, <a href="/search/cs?searchtype=author&query=Park%2C+C">Chanwook Park</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+I">Ilkon Kim</a>, <a href="/search/cs?searchtype=author&query=Park%2C+J">Jaehan Park</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jeongbin Kim</a>, <a href="/search/cs?searchtype=author&query=Shin%2C+W">Woojae Shin</a>, <a href="/search/cs?searchtype=author&query=Won%2C+J">Jongsoon Won</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+H">Haerang Choi</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+K">Kyuyoung Kim</a>, <a href="/search/cs?searchtype=author&query=Kwon%2C+D">Daehan Kwon</a>, <a href="/search/cs?searchtype=author&query=Jeong%2C+C">Chunseok Jeong</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">Sangheon Lee</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yongseok Choi</a>, <a href="/search/cs?searchtype=author&query=Byun%2C+W">Wooseok Byun</a>, <a href="/search/cs?searchtype=author&query=Baek%2C+S">Seungcheol Baek</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Hyuk-Jae Lee</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">John Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15008v1-abstract-short" style="display: inline;"> Accelerating end-to-end inference of transformer-based large language models (LLMs) is a critical component of AI services in datacenters. However, diverse compute characteristics of end-to-end LLM inference present challenges as previously proposed accelerators only address certain operations or stages (e.g., self-attention, generation stage, etc.). To address the unique challenges of acceleratin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15008v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15008v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15008v1-abstract-full" style="display: none;"> Accelerating end-to-end inference of transformer-based large language models (LLMs) is a critical component of AI services in datacenters. However, diverse compute characteristics of end-to-end LLM inference present challenges as previously proposed accelerators only address certain operations or stages (e.g., self-attention, generation stage, etc.). To address the unique challenges of accelerating end-to-end inference, we propose IANUS -- Integrated Accelerator based on NPU-PIM Unified Memory System. IANUS is a domain-specific system architecture that combines a Neural Processing Unit (NPU) with a Processing-in-Memory (PIM) to leverage both the NPU's high computation throughput and the PIM's high effective memory bandwidth. In particular, IANUS employs a unified main memory system where the PIM memory is used both for PIM operations and for NPU's main memory. The unified main memory system ensures that memory capacity is efficiently utilized and the movement of shared data between NPU and PIM is minimized. However, it introduces new challenges since normal memory accesses and PIM computations cannot be performed simultaneously. Thus, we propose novel PIM Access Scheduling that manages normal memory accesses and PIM computations through workload mapping and scheduling across the PIM and the NPU. Our detailed simulation evaluations show that IANUS improves the performance of GPT-2 by 6.2$\times$ and 3.2$\times$, on average, compared to the NVIDIA A100 GPU and the state-of-the-art accelerator. As a proof-of-concept, we develop a prototype of IANUS with a commercial PIM, NPU, and an FPGA-based PIM controller to demonstrate the feasibility of IANUS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15008v1-abstract-full').style.display = 'none'; document.getElementById('2410.15008v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Updated version of the paper accepted to ASPLOS 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ASPLOS 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14632">arXiv:2410.14632</a> <span> [<a href="https://arxiv.org/pdf/2410.14632">pdf</a>, <a href="https://arxiv.org/format/2410.14632">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Diverging Preferences: When do Annotators Disagree and do Models Know? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+M+J">Michael JQ Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhilin Wang</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+J+D">Jena D. Hwang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yi Dong</a>, <a href="/search/cs?searchtype=author&query=Delalleau%2C+O">Olivier Delalleau</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+E">Eunsol Choi</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+X">Xiang Ren</a>, <a href="/search/cs?searchtype=author&query=Pyatkin%2C+V">Valentina Pyatkin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14632v2-abstract-short" style="display: inline;"> We examine diverging preferences in human-labeled preference datasets. We develop a taxonomy of disagreement sources spanning 10 categories across four high-level classes -- task underspecification, response style, refusals, and annotation errors. We find that the majority of disagreements are in opposition with standard reward modeling approaches, which are designed with the assumption that annot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14632v2-abstract-full').style.display = 'inline'; document.getElementById('2410.14632v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14632v2-abstract-full" style="display: none;"> We examine diverging preferences in human-labeled preference datasets. We develop a taxonomy of disagreement sources spanning 10 categories across four high-level classes -- task underspecification, response style, refusals, and annotation errors. We find that the majority of disagreements are in opposition with standard reward modeling approaches, which are designed with the assumption that annotator disagreement is noise. We then explore how these findings impact two areas of LLM development: reward modeling and evaluation. In our experiments, we demonstrate how standard reward modeling methods, like the Bradley-Terry model, fail to differentiate whether a given preference judgment is the result of unanimous agreement among annotators or the majority opinion among diverging user preferences. We also find that these tendencies are also echoed by popular LLM-as-Judge evaluation methods, which consistently identify a winning response in cases of diverging preferences. These findings highlight remaining challenges in LLM evaluations, which are greatly influenced by divisive features like response style, and in developing pluralistically aligned LLMs. To address these issues, we develop methods for identifying diverging preferences to mitigate their influence on evaluation and training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14632v2-abstract-full').style.display = 'none'; document.getElementById('2410.14632v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14001">arXiv:2410.14001</a> <span> [<a href="https://arxiv.org/pdf/2410.14001">pdf</a>, <a href="https://arxiv.org/format/2410.14001">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Personalized Adaptation via In-Context Preference Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lau%2C+A">Allison Lau</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Younwoo Choi</a>, <a href="/search/cs?searchtype=author&query=Balazadeh%2C+V">Vahid Balazadeh</a>, <a href="/search/cs?searchtype=author&query=Chidambaram%2C+K">Keertana Chidambaram</a>, <a href="/search/cs?searchtype=author&query=Syrgkanis%2C+V">Vasilis Syrgkanis</a>, <a href="/search/cs?searchtype=author&query=Krishnan%2C+R+G">Rahul G. Krishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14001v1-abstract-short" style="display: inline;"> Reinforcement Learning from Human Feedback (RLHF) is widely used to align Language Models (LMs) with human preferences. However, existing approaches often neglect individual user preferences, leading to suboptimal personalization. We present the Preference Pretrained Transformer (PPT), a novel approach for adaptive personalization using online user feedback. PPT leverages the in-context learning c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14001v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14001v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14001v1-abstract-full" style="display: none;"> Reinforcement Learning from Human Feedback (RLHF) is widely used to align Language Models (LMs) with human preferences. However, existing approaches often neglect individual user preferences, leading to suboptimal personalization. We present the Preference Pretrained Transformer (PPT), a novel approach for adaptive personalization using online user feedback. PPT leverages the in-context learning capabilities of transformers to dynamically adapt to individual preferences. Our approach consists of two phases: (1) an offline phase where we train a single policy model using a history-dependent loss function, and (2) an online phase where the model adapts to user preferences through in-context learning. We demonstrate PPT's effectiveness in a contextual bandit setting, showing that it achieves personalized adaptation superior to existing methods while significantly reducing the computational costs. Our results suggest the potential of in-context learning for scalable and efficient personalization in large language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14001v1-abstract-full').style.display = 'none'; document.getElementById('2410.14001v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13648">arXiv:2410.13648</a> <span> [<a href="https://arxiv.org/pdf/2410.13648">pdf</a>, <a href="https://arxiv.org/format/2410.13648">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SimpleToM: Exposing the Gap between Explicit ToM Inference and Implicit ToM Application in LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yuling Gu</a>, <a href="/search/cs?searchtype=author&query=Tafjord%2C+O">Oyvind Tafjord</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+H">Hyunwoo Kim</a>, <a href="/search/cs?searchtype=author&query=Moore%2C+J">Jared Moore</a>, <a href="/search/cs?searchtype=author&query=Bras%2C+R+L">Ronan Le Bras</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+P">Peter Clark</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13648v1-abstract-short" style="display: inline;"> While prior work has explored whether large language models (LLMs) possess a "theory of mind" (ToM) - the ability to attribute mental states to oneself and others - there has been little work testing whether LLMs can implicitly apply such knowledge to predict behavior, or to judge whether an observed behavior is rational. Such skills are critical for appropriate interaction in social environments.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13648v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13648v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13648v1-abstract-full" style="display: none;"> While prior work has explored whether large language models (LLMs) possess a "theory of mind" (ToM) - the ability to attribute mental states to oneself and others - there has been little work testing whether LLMs can implicitly apply such knowledge to predict behavior, or to judge whether an observed behavior is rational. Such skills are critical for appropriate interaction in social environments. We create a new dataset, SimpleTom, containing concise, diverse stories (e.g., "The can of Pringles has moldy chips in it. Mary picks up the can in the supermarket and walks to the cashier."), each with three questions that test different degrees of ToM reasoning, asking models to predict (a) mental state ("Is Mary aware of the mold?"), (b) behavior ("Will Mary pay for the chips or report the mold?"), and (c) judgment ("Mary paid for the chips. Was that reasonable?"). To our knowledge, SimpleToM is the first dataset to systematically explore downstream reasoning requiring knowledge of mental states in realistic scenarios. Our experimental results are intriguing: While most models can reliably predict mental state on our dataset (a), they often fail to correctly predict the behavior (b), and fare even worse at judging whether given behaviors are reasonable (c), despite being correctly aware of the protagonist's mental state should make such secondary predictions obvious. We further show that we can help models do better at (b) and (c) via interventions such as reminding the model of its earlier mental state answer and mental-state-specific chain-of-thought prompting, raising the action prediction accuracies (e.g., from 49.5% to 93.5% for GPT-4o) and judgment accuracies (e.g., from 15.3% to 94.7% in GPT-4o). While this shows that models can be coaxed to perform well, it requires task-specific interventions, and the natural model performances remain low, a cautionary tale for LLM deployment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13648v1-abstract-full').style.display = 'none'; document.getElementById('2410.13648v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13061">arXiv:2410.13061</a> <span> [<a href="https://arxiv.org/pdf/2410.13061">pdf</a>, <a href="https://arxiv.org/format/2410.13061">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Optimal Transport for Probabilistic Circuits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ciotinga%2C+A">Adrian Ciotinga</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">YooJung Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13061v1-abstract-short" style="display: inline;"> We introduce a novel optimal transport framework for probabilistic circuits (PCs). While it has been shown recently that divergences between distributions represented as certain classes of PCs can be computed tractably, to the best of our knowledge, there is no existing approach to compute the Wasserstein distance between probability distributions given by PCs. We consider a Wasserstein-type dista… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13061v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13061v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13061v1-abstract-full" style="display: none;"> We introduce a novel optimal transport framework for probabilistic circuits (PCs). While it has been shown recently that divergences between distributions represented as certain classes of PCs can be computed tractably, to the best of our knowledge, there is no existing approach to compute the Wasserstein distance between probability distributions given by PCs. We consider a Wasserstein-type distance that restricts the coupling measure of the associated optimal transport problem to be a probabilistic circuit. We then develop an algorithm for computing this distance by solving a series of small linear programs and derive the circuit conditions under which this is tractable. Furthermore, we show that we can also retrieve the optimal transport plan between the PCs from the solutions to these linear programming problems. We then consider the empirical Wasserstein distance between a PC and a dataset, and show that we can estimate the PC parameters to minimize this distance through an efficient iterative algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13061v1-abstract-full').style.display = 'none'; document.getElementById('2410.13061v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11163">arXiv:2410.11163</a> <span> [<a href="https://arxiv.org/pdf/2410.11163">pdf</a>, <a href="https://arxiv.org/format/2410.11163">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Model Swarms: Collaborative Search to Adapt LLM Experts via Swarm Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+S">Shangbin Feng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zifeng Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yike Wang</a>, <a href="/search/cs?searchtype=author&query=Ebrahimi%2C+S">Sayna Ebrahimi</a>, <a href="/search/cs?searchtype=author&query=Palangi%2C+H">Hamid Palangi</a>, <a href="/search/cs?searchtype=author&query=Miculicich%2C+L">Lesly Miculicich</a>, <a href="/search/cs?searchtype=author&query=Kulshrestha%2C+A">Achin Kulshrestha</a>, <a href="/search/cs?searchtype=author&query=Rauschmayr%2C+N">Nathalie Rauschmayr</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Tsvetkov%2C+Y">Yulia Tsvetkov</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+C">Chen-Yu Lee</a>, <a href="/search/cs?searchtype=author&query=Pfister%2C+T">Tomas Pfister</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11163v1-abstract-short" style="display: inline;"> We propose Model Swarms, a collaborative search algorithm to adapt LLMs via swarm intelligence, the collective behavior guiding individual systems. Specifically, Model Swarms starts with a pool of LLM experts and a utility function. Guided by the best-found checkpoints across models, diverse LLM experts collaboratively move in the weight space and optimize a utility function representing model ada… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11163v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11163v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11163v1-abstract-full" style="display: none;"> We propose Model Swarms, a collaborative search algorithm to adapt LLMs via swarm intelligence, the collective behavior guiding individual systems. Specifically, Model Swarms starts with a pool of LLM experts and a utility function. Guided by the best-found checkpoints across models, diverse LLM experts collaboratively move in the weight space and optimize a utility function representing model adaptation objectives. Compared to existing model composition approaches, Model Swarms offers tuning-free model adaptation, works in low-data regimes with as few as 200 examples, and does not require assumptions about specific experts in the swarm or how they should be composed. Extensive experiments demonstrate that Model Swarms could flexibly adapt LLM experts to a single task, multi-task domains, reward models, as well as diverse human interests, improving over 12 model composition baselines by up to 21.0% across tasks and contexts. Further analysis reveals that LLM experts discover previously unseen capabilities in initial checkpoints and that Model Swarms enable the weak-to-strong transition of experts through the collaborative search process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11163v1-abstract-full').style.display = 'none'; document.getElementById('2410.11163v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08731">arXiv:2410.08731</a> <span> [<a href="https://arxiv.org/pdf/2410.08731">pdf</a>, <a href="https://arxiv.org/format/2410.08731">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Developing a Pragmatic Benchmark for Assessing Korean Legal Language Understanding in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+Y">Yeeun Kim</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y+R">Young Rok Choi</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+E">Eunkyung Choi</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+J">Jinhwan Choi</a>, <a href="/search/cs?searchtype=author&query=Park%2C+H+J">Hai Jin Park</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+W">Wonseok Hwang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08731v1-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated remarkable performance in the legal domain, with GPT-4 even passing the Uniform Bar Exam in the U.S. However their efficacy remains limited for non-standardized tasks and tasks in languages other than English. This underscores the need for careful evaluation of LLMs within each legal system before application. Here, we introduce KBL, a benchmark for a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08731v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08731v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08731v1-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated remarkable performance in the legal domain, with GPT-4 even passing the Uniform Bar Exam in the U.S. However their efficacy remains limited for non-standardized tasks and tasks in languages other than English. This underscores the need for careful evaluation of LLMs within each legal system before application. Here, we introduce KBL, a benchmark for assessing the Korean legal language understanding of LLMs, consisting of (1) 7 legal knowledge tasks (510 examples), (2) 4 legal reasoning tasks (288 examples), and (3) the Korean bar exam (4 domains, 53 tasks, 2,510 examples). First two datasets were developed in close collaboration with lawyers to evaluate LLMs in practical scenarios in a certified manner. Furthermore, considering legal practitioners' frequent use of extensive legal documents for research, we assess LLMs in both a closed book setting, where they rely solely on internal knowledge, and a retrieval-augmented generation (RAG) setting, using a corpus of Korean statutes and precedents. The results indicate substantial room and opportunities for improvement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08731v1-abstract-full').style.display = 'none'; document.getElementById('2410.08731v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06415">arXiv:2410.06415</a> <span> [<a href="https://arxiv.org/pdf/2410.06415">pdf</a>, <a href="https://arxiv.org/format/2410.06415">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Biased AI can Influence Political Decision-Making </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fisher%2C+J">Jillian Fisher</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+S">Shangbin Feng</a>, <a href="/search/cs?searchtype=author&query=Aron%2C+R">Robert Aron</a>, <a href="/search/cs?searchtype=author&query=Richardson%2C+T">Thomas Richardson</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Fisher%2C+D+W">Daniel W. Fisher</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+J">Jennifer Pan</a>, <a href="/search/cs?searchtype=author&query=Tsvetkov%2C+Y">Yulia Tsvetkov</a>, <a href="/search/cs?searchtype=author&query=Reinecke%2C+K">Katharina Reinecke</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06415v2-abstract-short" style="display: inline;"> As modern AI models become integral to everyday tasks, concerns about their inherent biases and their potential impact on human decision-making have emerged. While bias in models are well-documented, less is known about how these biases influence human decisions. This paper presents two interactive experiments investigating the effects of partisan bias in AI language models on political decision-m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06415v2-abstract-full').style.display = 'inline'; document.getElementById('2410.06415v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06415v2-abstract-full" style="display: none;"> As modern AI models become integral to everyday tasks, concerns about their inherent biases and their potential impact on human decision-making have emerged. While bias in models are well-documented, less is known about how these biases influence human decisions. This paper presents two interactive experiments investigating the effects of partisan bias in AI language models on political decision-making. Participants interacted freely with either a biased liberal, biased conservative, or unbiased control model while completing political decision-making tasks. We found that participants exposed to politically biased models were significantly more likely to adopt opinions and make decisions aligning with the AI's bias, regardless of their personal political partisanship. However, we also discovered that prior knowledge about AI could lessen the impact of the bias, highlighting the possible importance of AI education for robust bias mitigation. Our findings not only highlight the critical effects of interacting with biased AI and its ability to impact public discourse and political conduct, but also highlights potential techniques for mitigating these risks in the future. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06415v2-abstract-full').style.display = 'none'; document.getElementById('2410.06415v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05829">arXiv:2410.05829</a> <span> [<a href="https://arxiv.org/pdf/2410.05829">pdf</a>, <a href="https://arxiv.org/format/2410.05829">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> A GPT-based Decision Transformer for Multi-Vehicle Coordination at Unsignalized Intersections </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+E">Eunjae Lee</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+M">Minhee Kang</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yoojin Choi</a>, <a href="/search/cs?searchtype=author&query=Ahn%2C+H">Heejin Ahn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05829v1-abstract-short" style="display: inline;"> In this paper, we explore the application of the Decision Transformer, a decision-making algorithm based on the Generative Pre-trained Transformer (GPT) architecture, to multi-vehicle coordination at unsignalized intersections. We formulate the coordination problem so as to find the optimal trajectories for multiple vehicles at intersections, modeling it as a sequence prediction task to fully leve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05829v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05829v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05829v1-abstract-full" style="display: none;"> In this paper, we explore the application of the Decision Transformer, a decision-making algorithm based on the Generative Pre-trained Transformer (GPT) architecture, to multi-vehicle coordination at unsignalized intersections. We formulate the coordination problem so as to find the optimal trajectories for multiple vehicles at intersections, modeling it as a sequence prediction task to fully leverage the power of GPTs as a sequence model. Through extensive experiments, we compare our approach to a reservation-based intersection management system. Our results show that the Decision Transformer can outperform the training data in terms of total travel time and can be generalized effectively to various scenarios, including noise-induced velocity variations, continuous interaction environments, and different vehicle numbers and road configurations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05829v1-abstract-full').style.display = 'none'; document.getElementById('2410.05829v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Choi%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Choi%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Choi%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Choi%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Choi%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Choi%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository