CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 271 results for author: <span class="mathjax">Lee, L</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Lee%2C+L">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Lee, L"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Lee%2C+L&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Lee, L"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Lee%2C+L&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Lee%2C+L&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Lee%2C+L&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Lee%2C+L&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Lee%2C+L&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Lee%2C+L&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Lee%2C+L&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17657">arXiv:2502.17657</a> <span> [<a href="https://arxiv.org/pdf/2502.17657">pdf</a>, <a href="https://arxiv.org/format/2502.17657">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> StatLLM: A Dataset for Evaluating the Performance of Large Language Models in Statistical Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+X">Xinyi Song</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lina Lee</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+K">Kexin Xie</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xueying Liu</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+X">Xinwei Deng</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yili Hong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17657v1-abstract-short" style="display: inline;"> The coding capabilities of large language models (LLMs) have opened up new opportunities for automatic statistical analysis in machine learning and data science. However, before their widespread adoption, it is crucial to assess the accuracy of code generated by LLMs. A major challenge in this evaluation lies in the absence of a benchmark dataset for statistical code (e.g., SAS and R). To fill in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17657v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17657v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17657v1-abstract-full" style="display: none;"> The coding capabilities of large language models (LLMs) have opened up new opportunities for automatic statistical analysis in machine learning and data science. However, before their widespread adoption, it is crucial to assess the accuracy of code generated by LLMs. A major challenge in this evaluation lies in the absence of a benchmark dataset for statistical code (e.g., SAS and R). To fill in this gap, this paper introduces StatLLM, an open-source dataset for evaluating the performance of LLMs in statistical analysis. The StatLLM dataset comprises three key components: statistical analysis tasks, LLM-generated SAS code, and human evaluation scores. The first component includes statistical analysis tasks spanning a variety of analyses and datasets, providing problem descriptions, dataset details, and human-verified SAS code. The second component features SAS code generated by ChatGPT 3.5, ChatGPT 4.0, and Llama 3.1 for those tasks. The third component contains evaluation scores from human experts in assessing the correctness, effectiveness, readability, executability, and output accuracy of the LLM-generated code. We also illustrate the unique potential of the established benchmark dataset for (1) evaluating and enhancing natural language processing metrics, (2) assessing and improving LLM performance in statistical coding, and (3) developing and testing of next-generation statistical software - advancements that are crucial for data science and machine learning research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17657v1-abstract-full').style.display = 'none'; document.getElementById('2502.17657v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16021">arXiv:2502.16021</a> <span> [<a href="https://arxiv.org/pdf/2502.16021">pdf</a>, <a href="https://arxiv.org/ps/2502.16021">ps</a>, <a href="https://arxiv.org/format/2502.16021">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning Neural Networks with Distribution Shift: Efficiently Certifiable Guarantees </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chandrasekaran%2C+G">Gautam Chandrasekaran</a>, <a href="/search/cs?searchtype=author&query=Klivans%2C+A+R">Adam R. Klivans</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L+L">Lin Lin Lee</a>, <a href="/search/cs?searchtype=author&query=Stavropoulos%2C+K">Konstantinos Stavropoulos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16021v1-abstract-short" style="display: inline;"> We give the first provably efficient algorithms for learning neural networks with distribution shift. We work in the Testable Learning with Distribution Shift framework (TDS learning) of Klivans et al. (2024), where the learner receives labeled examples from a training distribution and unlabeled examples from a test distribution and must either output a hypothesis with low test error or reject if… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16021v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16021v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16021v1-abstract-full" style="display: none;"> We give the first provably efficient algorithms for learning neural networks with distribution shift. We work in the Testable Learning with Distribution Shift framework (TDS learning) of Klivans et al. (2024), where the learner receives labeled examples from a training distribution and unlabeled examples from a test distribution and must either output a hypothesis with low test error or reject if distribution shift is detected. No assumptions are made on the test distribution. All prior work in TDS learning focuses on classification, while here we must handle the setting of nonconvex regression. Our results apply to real-valued networks with arbitrary Lipschitz activations and work whenever the training distribution has strictly sub-exponential tails. For training distributions that are bounded and hypercontractive, we give a fully polynomial-time algorithm for TDS learning one hidden-layer networks with sigmoid activations. We achieve this by importing classical kernel methods into the TDS framework using data-dependent feature maps and a type of kernel matrix that couples samples from both train and test distributions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16021v1-abstract-full').style.display = 'none'; document.getElementById('2502.16021v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in The Thirteenth International Conference on Learning Representations (ICLR 2025) 38 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13117">arXiv:2502.13117</a> <span> [<a href="https://arxiv.org/pdf/2502.13117">pdf</a>, <a href="https://arxiv.org/format/2502.13117">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Performance Evaluation of Large Language Models in Statistical Programming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+X">Xinyi Song</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+K">Kexin Xie</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lina Lee</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+R">Ruizhe Chen</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+M">Jared M. Clark</a>, <a href="/search/cs?searchtype=author&query=He%2C+H">Hao He</a>, <a href="/search/cs?searchtype=author&query=He%2C+H">Haoran He</a>, <a href="/search/cs?searchtype=author&query=Min%2C+J">Jie Min</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinlei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Simin Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhiyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+X">Xinwei Deng</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yili Hong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13117v1-abstract-short" style="display: inline;"> The programming capabilities of large language models (LLMs) have revolutionized automatic code generation and opened new avenues for automatic statistical analysis. However, the validity and quality of these generated codes need to be systematically evaluated before they can be widely adopted. Despite their growing prominence, a comprehensive evaluation of statistical code generated by LLMs remai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13117v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13117v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13117v1-abstract-full" style="display: none;"> The programming capabilities of large language models (LLMs) have revolutionized automatic code generation and opened new avenues for automatic statistical analysis. However, the validity and quality of these generated codes need to be systematically evaluated before they can be widely adopted. Despite their growing prominence, a comprehensive evaluation of statistical code generated by LLMs remains scarce in the literature. In this paper, we assess the performance of LLMs, including two versions of ChatGPT and one version of Llama, in the domain of SAS programming for statistical analysis. Our study utilizes a set of statistical analysis tasks encompassing diverse statistical topics and datasets. Each task includes a problem description, dataset information, and human-verified SAS code. We conduct a comprehensive assessment of the quality of SAS code generated by LLMs through human expert evaluation based on correctness, effectiveness, readability, executability, and the accuracy of output results. The analysis of rating scores reveals that while LLMs demonstrate usefulness in generating syntactically correct code, they struggle with tasks requiring deep domain understanding and may produce redundant or incorrect results. This study offers valuable insights into the capabilities and limitations of LLMs in statistical programming, providing guidance for future advancements in AI-assisted coding systems for statistical analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13117v1-abstract-full').style.display = 'none'; document.getElementById('2502.13117v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12944">arXiv:2502.12944</a> <span> [<a href="https://arxiv.org/pdf/2502.12944">pdf</a>, <a href="https://arxiv.org/format/2502.12944">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Performance of Zero-Shot Time Series Foundation Models on Cloud Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Toner%2C+W">William Toner</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+T+L">Thomas L. Lee</a>, <a href="/search/cs?searchtype=author&query=Joosen%2C+A">Artjom Joosen</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+R">Rajkarn Singh</a>, <a href="/search/cs?searchtype=author&query=Asenov%2C+M">Martin Asenov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12944v2-abstract-short" style="display: inline;"> Time series foundation models (FMs) have emerged as a popular paradigm for zero-shot multi-domain forecasting. FMs are trained on numerous diverse datasets and claim to be effective forecasters across multiple different time series domains, including cloud data. In this work we investigate this claim, exploring the effectiveness of FMs on cloud data. We demonstrate that many well-known FMs fail to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12944v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12944v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12944v2-abstract-full" style="display: none;"> Time series foundation models (FMs) have emerged as a popular paradigm for zero-shot multi-domain forecasting. FMs are trained on numerous diverse datasets and claim to be effective forecasters across multiple different time series domains, including cloud data. In this work we investigate this claim, exploring the effectiveness of FMs on cloud data. We demonstrate that many well-known FMs fail to generate meaningful or accurate zero-shot forecasts in this setting. We support this claim empirically, showing that FMs are outperformed consistently by simple linear baselines. We also illustrate a number of interesting pathologies, including instances where FMs suddenly output seemingly erratic, random-looking forecasts. Our results suggest a widespread failure of FMs to model cloud data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12944v2-abstract-full').style.display = 'none'; document.getElementById('2502.12944v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12920">arXiv:2502.12920</a> <span> [<a href="https://arxiv.org/pdf/2502.12920">pdf</a>, <a href="https://arxiv.org/format/2502.12920">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Lightweight Online Adaption for Time Series Foundation Model Forecasts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+T+L">Thomas L. Lee</a>, <a href="/search/cs?searchtype=author&query=Toner%2C+W">William Toner</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+R">Rajkarn Singh</a>, <a href="/search/cs?searchtype=author&query=Joosem%2C+A">Artjom Joosem</a>, <a href="/search/cs?searchtype=author&query=Asenov%2C+M">Martin Asenov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12920v1-abstract-short" style="display: inline;"> Foundation models (FMs) have emerged as a promising approach for time series forecasting. While effective, FMs typically remain fixed during deployment due to the high computational costs of learning them online. Consequently, deployed FMs fail to adapt their forecasts to current data characteristics, despite the availability of online feedback from newly arriving data. This raises the question of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12920v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12920v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12920v1-abstract-full" style="display: none;"> Foundation models (FMs) have emerged as a promising approach for time series forecasting. While effective, FMs typically remain fixed during deployment due to the high computational costs of learning them online. Consequently, deployed FMs fail to adapt their forecasts to current data characteristics, despite the availability of online feedback from newly arriving data. This raises the question of whether FM performance can be enhanced by the efficient usage of this feedback. We propose AdapTS to answer this question. AdapTS is a lightweight mechanism for the online adaption of FM forecasts in response to online feedback. AdapTS consists of two parts: a) the AdapTS-Forecaster which is used to learn the current data distribution; and b) the AdapTS-Weighter which is used to combine the forecasts of the FM and the AdapTS-Forecaster. We evaluate the performance of AdapTS in conjunction with several recent FMs across a suite of standard time series datasets. In all of our experiments we find that using AdapTS improves performance. This work demonstrates how efficient usage of online feedback can be used to improve FM forecasts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12920v1-abstract-full').style.display = 'none'; document.getElementById('2502.12920v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11451">arXiv:2502.11451</a> <span> [<a href="https://arxiv.org/pdf/2502.11451">pdf</a>, <a href="https://arxiv.org/format/2502.11451">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> From Personas to Talks: Revisiting the Impact of Personas on LLM-Synthesized Emotional Support Conversations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+S">Shenghan Wu</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Y">Yang Deng</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yimo Zhu</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+W">Wynne Hsu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+M+L">Mong Li Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11451v1-abstract-short" style="display: inline;"> The rapid advancement of Large Language Models (LLMs) has revolutionized the generation of emotional support conversations (ESC), offering scalable solutions with reduced costs and enhanced data privacy. This paper explores the role of personas in the creation of ESC by LLMs. Our research utilizes established psychological frameworks to measure and infuse persona traits into LLMs, which then gener… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11451v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11451v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11451v1-abstract-full" style="display: none;"> The rapid advancement of Large Language Models (LLMs) has revolutionized the generation of emotional support conversations (ESC), offering scalable solutions with reduced costs and enhanced data privacy. This paper explores the role of personas in the creation of ESC by LLMs. Our research utilizes established psychological frameworks to measure and infuse persona traits into LLMs, which then generate dialogues in the emotional support scenario. We conduct extensive evaluations to understand the stability of persona traits in dialogues, examining shifts in traits post-generation and their impact on dialogue quality and strategy distribution. Experimental results reveal several notable findings: 1) LLMs can infer core persona traits, 2) subtle shifts in emotionality and extraversion occur, influencing the dialogue dynamics, and 3) the application of persona traits modifies the distribution of emotional support strategies, enhancing the relevance and empathetic quality of the responses. These findings highlight the potential of persona-driven LLMs in crafting more personalized, empathetic, and effective emotional support dialogues, which has significant implications for the future design of AI-driven emotional support systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11451v1-abstract-full').style.display = 'none'; document.getElementById('2502.11451v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03660">arXiv:2502.03660</a> <span> [<a href="https://arxiv.org/pdf/2502.03660">pdf</a>, <a href="https://arxiv.org/format/2502.03660">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Energy & Force Regression on DFT Trajectories is Not Enough for Universal Machine Learning Interatomic Potentials </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Miret%2C+S">Santiago Miret</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K+L+K">Kin Long Kelvin Lee</a>, <a href="/search/cs?searchtype=author&query=Gonzales%2C+C">Carmelo Gonzales</a>, <a href="/search/cs?searchtype=author&query=Mannan%2C+S">Sajid Mannan</a>, <a href="/search/cs?searchtype=author&query=Krishnan%2C+N+M+A">N. M. Anoop Krishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03660v1-abstract-short" style="display: inline;"> Universal Machine Learning Interactomic Potentials (MLIPs) enable accelerated simulations for materials discovery. However, current research efforts fail to impactfully utilize MLIPs due to: 1. Overreliance on Density Functional Theory (DFT) for MLIP training data creation; 2. MLIPs' inability to reliably and accurately perform large-scale molecular dynamics (MD) simulations for diverse materials;… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03660v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03660v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03660v1-abstract-full" style="display: none;"> Universal Machine Learning Interactomic Potentials (MLIPs) enable accelerated simulations for materials discovery. However, current research efforts fail to impactfully utilize MLIPs due to: 1. Overreliance on Density Functional Theory (DFT) for MLIP training data creation; 2. MLIPs' inability to reliably and accurately perform large-scale molecular dynamics (MD) simulations for diverse materials; 3. Limited understanding of MLIPs' underlying capabilities. To address these shortcomings, we aargue that MLIP research efforts should prioritize: 1. Employing more accurate simulation methods for large-scale MLIP training data creation (e.g. Coupled Cluster Theory) that cover a wide range of materials design spaces; 2. Creating MLIP metrology tools that leverage large-scale benchmarking, visualization, and interpretability analyses to provide a deeper understanding of MLIPs' inner workings; 3. Developing computationally efficient MLIPs to execute MD simulations that accurately model a broad set of materials properties. Together, these interdisciplinary research directions can help further the real-world application of MLIPs to accurately model complex materials at device scale. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03660v1-abstract-full').style.display = 'none'; document.getElementById('2502.03660v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03638">arXiv:2502.03638</a> <span> [<a href="https://arxiv.org/pdf/2502.03638">pdf</a>, <a href="https://arxiv.org/format/2502.03638">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SymmCD: Symmetry-Preserving Crystal Generation with Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Levy%2C+D">Daniel Levy</a>, <a href="/search/cs?searchtype=author&query=Panigrahi%2C+S+S">Siba Smarak Panigrahi</a>, <a href="/search/cs?searchtype=author&query=Kaba%2C+S">S茅kou-Oumar Kaba</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Qiang Zhu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K+L+K">Kin Long Kelvin Lee</a>, <a href="/search/cs?searchtype=author&query=Galkin%2C+M">Mikhail Galkin</a>, <a href="/search/cs?searchtype=author&query=Miret%2C+S">Santiago Miret</a>, <a href="/search/cs?searchtype=author&query=Ravanbakhsh%2C+S">Siamak Ravanbakhsh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03638v1-abstract-short" style="display: inline;"> Generating novel crystalline materials has potential to lead to advancements in fields such as electronics, energy storage, and catalysis. The defining characteristic of crystals is their symmetry, which plays a central role in determining their physical properties. However, existing crystal generation methods either fail to generate materials that display the symmetries of real-world crystals, or… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03638v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03638v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03638v1-abstract-full" style="display: none;"> Generating novel crystalline materials has potential to lead to advancements in fields such as electronics, energy storage, and catalysis. The defining characteristic of crystals is their symmetry, which plays a central role in determining their physical properties. However, existing crystal generation methods either fail to generate materials that display the symmetries of real-world crystals, or simply replicate the symmetry information from examples in a database. To address this limitation, we propose SymmCD, a novel diffusion-based generative model that explicitly incorporates crystallographic symmetry into the generative process. We decompose crystals into two components and learn their joint distribution through diffusion: 1) the asymmetric unit, the smallest subset of the crystal which can generate the whole crystal through symmetry transformations, and; 2) the symmetry transformations needed to be applied to each atom in the asymmetric unit. We also use a novel and interpretable representation for these transformations, enabling generalization across different crystallographic symmetry groups. We showcase the competitive performance of SymmCD on a subset of the Materials Project, obtaining diverse and valid crystals with realistic symmetries and predicted properties. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03638v1-abstract-full').style.display = 'none'; document.getElementById('2502.03638v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17768">arXiv:2501.17768</a> <span> [<a href="https://arxiv.org/pdf/2501.17768">pdf</a>, <a href="https://arxiv.org/format/2501.17768">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TVCG.2025.3549569">10.1109/TVCG.2025.3549569 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> TeamPortal: Exploring Virtual Reality Collaboration Through Shared and Manipulating Parallel Views </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xian Wang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+L">Luyao Shen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lei Chen</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+M">Mingming Fan</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lik-Hang Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17768v1-abstract-short" style="display: inline;"> Virtual Reality (VR) offers a unique collaborative experience, with parallel views playing a pivotal role in Collaborative Virtual Environments by supporting the transfer and delivery of items. Sharing and manipulating partners' views provides users with a broader perspective that helps them identify the targets and partner actions. We proposed TeamPortal accordingly and conducted two user studies… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17768v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17768v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17768v1-abstract-full" style="display: none;"> Virtual Reality (VR) offers a unique collaborative experience, with parallel views playing a pivotal role in Collaborative Virtual Environments by supporting the transfer and delivery of items. Sharing and manipulating partners' views provides users with a broader perspective that helps them identify the targets and partner actions. We proposed TeamPortal accordingly and conducted two user studies with 72 participants (36 pairs) to investigate the potential benefits of interactive, shared perspectives in VR collaboration. Our first study compared ShaView and TeamPortal against a baseline in a collaborative task that encompassed a series of searching and manipulation tasks. The results show that TeamPortal significantly reduced movement and increased collaborative efficiency and social presence in complex tasks. Following the results, the second study evaluated three variants: TeamPortal+, SnapTeamPortal+, and DropTeamPortal+. The results show that both SnapTeamPortal+ and DropTeamPortal+ improved task efficiency and willingness to further adopt these technologies, though SnapTeamPortal+ reduced co-presence. Based on the findings, we proposed three design implications to inform the development of future VR collaboration systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17768v1-abstract-full').style.display = 'none'; document.getElementById('2501.17768v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14728">arXiv:2501.14728</a> <span> [<a href="https://arxiv.org/pdf/2501.14728">pdf</a>, <a href="https://arxiv.org/format/2501.14728">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Mitigating GenAI-powered Evidence Pollution for Out-of-Context Multimodal Misinformation Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+Z">Zehong Yan</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+P">Peng Qi</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+W">Wynne Hsu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+M+L">Mong Li Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14728v1-abstract-short" style="display: inline;"> While large generative artificial intelligence (GenAI) models have achieved significant success, they also raise growing concerns about online information security due to their potential misuse for generating deceptive content. Out-of-context (OOC) multimodal misinformation detection, which often retrieves Web evidence to identify the repurposing of images in false contexts, faces the issue of rea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14728v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14728v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14728v1-abstract-full" style="display: none;"> While large generative artificial intelligence (GenAI) models have achieved significant success, they also raise growing concerns about online information security due to their potential misuse for generating deceptive content. Out-of-context (OOC) multimodal misinformation detection, which often retrieves Web evidence to identify the repurposing of images in false contexts, faces the issue of reasoning over GenAI-polluted evidence to derive accurate predictions. Existing works simulate GenAI-powered pollution at the claim level with stylistic rewriting to conceal linguistic cues, and ignore evidence-level pollution for such information-seeking applications. In this work, we investigate how polluted evidence affects the performance of existing OOC detectors, revealing a performance degradation of more than 9 percentage points. We propose two strategies, cross-modal evidence reranking and cross-modal claim-evidence reasoning, to address the challenges posed by polluted evidence. Extensive experiments on two benchmark datasets show that these strategies can effectively enhance the robustness of existing out-of-context detectors amidst polluted evidence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14728v1-abstract-full').style.display = 'none'; document.getElementById('2501.14728v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14568">arXiv:2501.14568</a> <span> [<a href="https://arxiv.org/pdf/2501.14568">pdf</a>, <a href="https://arxiv.org/format/2501.14568">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Hybrid Quantum-Classical Multi-Agent Pathfinding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gerlach%2C+T">Thore Gerlach</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L+K">Loong Kuan Lee</a>, <a href="/search/cs?searchtype=author&query=Barbaresco%2C+F">Fr茅d茅ric Barbaresco</a>, <a href="/search/cs?searchtype=author&query=Piatkowski%2C+N">Nico Piatkowski</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14568v1-abstract-short" style="display: inline;"> Multi-Agent Path Finding (MAPF) focuses on determining conflict-free paths for multiple agents navigating through a shared space to reach specified goal locations. This problem becomes computationally challenging, particularly when handling large numbers of agents, as frequently encountered in practical applications like coordinating autonomous vehicles. Quantum computing (QC) is a promising candi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14568v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14568v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14568v1-abstract-full" style="display: none;"> Multi-Agent Path Finding (MAPF) focuses on determining conflict-free paths for multiple agents navigating through a shared space to reach specified goal locations. This problem becomes computationally challenging, particularly when handling large numbers of agents, as frequently encountered in practical applications like coordinating autonomous vehicles. Quantum computing (QC) is a promising candidate in overcoming such limits. However, current quantum hardware is still in its infancy and thus limited in terms of computing power and error robustness. In this work, we present the first optimal hybrid quantum-classical MAPF algorithm which is based on branch-and-cut-and-prize. QC is integrated by iteratively solving QUBO problems, based on conflict graphs. Experiments on actual quantum hardware and results on benchmark data suggest that our approach dominates previous QUBO formulations and baseline MAPF solvers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14568v1-abstract-full').style.display = 'none'; document.getElementById('2501.14568v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16228">arXiv:2412.16228</a> <span> [<a href="https://arxiv.org/pdf/2412.16228">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/AIxDKE63520.2024.00028">10.1109/AIxDKE63520.2024.00028 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> TAACKIT: Track Annotation and Analytics with Continuous Knowledge Integration Tool </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lily Lee</a>, <a href="/search/cs?searchtype=author&query=Fontes%2C+J">Julian Fontes</a>, <a href="/search/cs?searchtype=author&query=Weinert%2C+A">Andrew Weinert</a>, <a href="/search/cs?searchtype=author&query=Schomacker%2C+L">Laura Schomacker</a>, <a href="/search/cs?searchtype=author&query=Stabile%2C+D">Daniel Stabile</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+J">Jonathan Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16228v1-abstract-short" style="display: inline;"> Machine learning (ML) is a powerful tool for efficiently analyzing data, detecting patterns, and forecasting trends across various domains such as text, audio, and images. The availability of annotation tools to generate reliably annotated data is crucial for advances in ML applications. In the domain of geospatial tracks, the lack of such tools to annotate and validate data impedes rapid and acce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16228v1-abstract-full').style.display = 'inline'; document.getElementById('2412.16228v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16228v1-abstract-full" style="display: none;"> Machine learning (ML) is a powerful tool for efficiently analyzing data, detecting patterns, and forecasting trends across various domains such as text, audio, and images. The availability of annotation tools to generate reliably annotated data is crucial for advances in ML applications. In the domain of geospatial tracks, the lack of such tools to annotate and validate data impedes rapid and accessible ML application development. This paper presents Track Annotation and Analytics with Continuous Knowledge Integration Tool (TAACKIT) to serve the critically important functions of annotating geospatial track data and validating ML models. We demonstrate an ML application use case in the air traffic domain to illustrate its data annotation and model evaluation power and quantify the annotation effort reduction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16228v1-abstract-full').style.display = 'none'; document.getElementById('2412.16228v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> AIxDKE 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12540">arXiv:2412.12540</a> <span> [<a href="https://arxiv.org/pdf/2412.12540">pdf</a>, <a href="https://arxiv.org/format/2412.12540">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Stiefel Flow Matching for Moment-Constrained Structure Elucidation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+A">Austin Cheng</a>, <a href="/search/cs?searchtype=author&query=Lo%2C+A">Alston Lo</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K+L+K">Kin Long Kelvin Lee</a>, <a href="/search/cs?searchtype=author&query=Miret%2C+S">Santiago Miret</a>, <a href="/search/cs?searchtype=author&query=Aspuru-Guzik%2C+A">Al谩n Aspuru-Guzik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12540v2-abstract-short" style="display: inline;"> Molecular structure elucidation is a fundamental step in understanding chemical phenomena, with applications in identifying molecules in natural products, lab syntheses, forensic samples, and the interstellar medium. We consider the task of predicting a molecule's all-atom 3D structure given only its molecular formula and moments of inertia, motivated by the ability of rotational spectroscopy to m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12540v2-abstract-full').style.display = 'inline'; document.getElementById('2412.12540v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12540v2-abstract-full" style="display: none;"> Molecular structure elucidation is a fundamental step in understanding chemical phenomena, with applications in identifying molecules in natural products, lab syntheses, forensic samples, and the interstellar medium. We consider the task of predicting a molecule's all-atom 3D structure given only its molecular formula and moments of inertia, motivated by the ability of rotational spectroscopy to measure these moments. While existing generative models can conditionally sample 3D structures with approximately correct moments, this soft conditioning fails to leverage the many digits of precision afforded by experimental rotational spectroscopy. To address this, we first show that the space of $n$-atom point clouds with a fixed set of moments of inertia is embedded in the Stiefel manifold $\mathrm{St}(n, 4)$. We then propose Stiefel Flow Matching as a generative model for elucidating 3D structure under exact moment constraints. Additionally, we learn simpler and shorter flows by finding approximate solutions for equivariant optimal transport on the Stiefel manifold. Empirically, enforcing exact moment constraints allows Stiefel Flow Matching to achieve higher success rates and faster sampling than Euclidean diffusion models, even on high-dimensional manifolds corresponding to large molecules in the GEOM dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12540v2-abstract-full').style.display = 'none'; document.getElementById('2412.12540v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18623">arXiv:2411.18623</a> <span> [<a href="https://arxiv.org/pdf/2411.18623">pdf</a>, <a href="https://arxiv.org/format/2411.18623">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Lift3D Foundation Policy: Lifting 2D Large-Scale Pretrained Models for Robust 3D Robotic Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jia%2C+Y">Yueru Jia</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaming Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Sixiang Chen</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+C">Chenyang Gu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhilue Wang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Longzan Luo</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lily Lee</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Pengwei Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhongyuan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Renrui Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shanghang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18623v2-abstract-short" style="display: inline;"> 3D geometric information is essential for manipulation tasks, as robots need to perceive the 3D environment, reason about spatial relationships, and interact with intricate spatial configurations. Recent research has increasingly focused on the explicit extraction of 3D features, while still facing challenges such as the lack of large-scale robotic 3D data and the potential loss of spatial geometr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18623v2-abstract-full').style.display = 'inline'; document.getElementById('2411.18623v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18623v2-abstract-full" style="display: none;"> 3D geometric information is essential for manipulation tasks, as robots need to perceive the 3D environment, reason about spatial relationships, and interact with intricate spatial configurations. Recent research has increasingly focused on the explicit extraction of 3D features, while still facing challenges such as the lack of large-scale robotic 3D data and the potential loss of spatial geometry. To address these limitations, we propose the Lift3D framework, which progressively enhances 2D foundation models with implicit and explicit 3D robotic representations to construct a robust 3D manipulation policy. Specifically, we first design a task-aware masked autoencoder that masks task-relevant affordance patches and reconstructs depth information, enhancing the 2D foundation model's implicit 3D robotic representation. After self-supervised fine-tuning, we introduce a 2D model-lifting strategy that establishes a positional mapping between the input 3D points and the positional embeddings of the 2D model. Based on the mapping, Lift3D utilizes the 2D foundation model to directly encode point cloud data, leveraging large-scale pretrained knowledge to construct explicit 3D robotic representations while minimizing spatial information loss. In experiments, Lift3D consistently outperforms previous state-of-the-art methods across several simulation benchmarks and real-world scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18623v2-abstract-full').style.display = 'none'; document.getElementById('2411.18623v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11191">arXiv:2411.11191</a> <span> [<a href="https://arxiv.org/pdf/2411.11191">pdf</a>, <a href="https://arxiv.org/format/2411.11191">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Accelerating Quantum Emitter Characterization with Latent Neural Ordinary Differential Equations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Proppe%2C+A+H">Andrew H. Proppe</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K+L+K">Kin Long Kelvin Lee</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Weiwei Sun</a>, <a href="/search/cs?searchtype=author&query=Krajewska%2C+C+J">Chantalle J. Krajewska</a>, <a href="/search/cs?searchtype=author&query=Tye%2C+O">Oliver Tye</a>, <a href="/search/cs?searchtype=author&query=Bawendi%2C+M+G">Moungi G. Bawendi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11191v1-abstract-short" style="display: inline;"> Deep neural network models can be used to learn complex dynamics from data and reconstruct sparse or noisy signals, thereby accelerating and augmenting experimental measurements. Evaluating the quantum optical properties of solid-state single-photon emitters is a time-consuming task that typically requires interferometric photon correlation experiments, such as Photon correlation Fourier spectrosc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11191v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11191v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11191v1-abstract-full" style="display: none;"> Deep neural network models can be used to learn complex dynamics from data and reconstruct sparse or noisy signals, thereby accelerating and augmenting experimental measurements. Evaluating the quantum optical properties of solid-state single-photon emitters is a time-consuming task that typically requires interferometric photon correlation experiments, such as Photon correlation Fourier spectroscopy (PCFS) which measures time-resolved single emitter lineshapes. Here, we demonstrate a latent neural ordinary differential equation model that can forecast a complete and noise-free PCFS experiment from a small subset of noisy correlation functions. By encoding measured photon correlations into an initial value problem, the NODE can be propagated to an arbitrary number of interferometer delay times. We demonstrate this with 10 noisy photon correlation functions that are used to extrapolate an entire de-noised interferograms of up to 200 stage positions, enabling up to a 20-fold speedup in experimental acquisition time from $\sim$3 hours to 10 minutes. Our work presents a new approach to greatly accelerate the experimental characterization of novel quantum emitter materials using deep learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11191v1-abstract-full').style.display = 'none'; document.getElementById('2411.11191v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05146">arXiv:2411.05146</a> <span> [<a href="https://arxiv.org/pdf/2411.05146">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Break Times: Virtual Reality Art Therapy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yap%2C+Y+R">Yi Rou Yap</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y+L">Yun Li Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05146v1-abstract-short" style="display: inline;"> This paper presents a Virtual Reality (VR) art therapy known as "Break Times" which aims to enhance students' mental well-being and foster creative expression. The proposed "Break Times" application mimics the art therapy sessions in the VR environment design. Pilot user acceptance test with 10 participants showed a notable reduction in stress levels, with 50% reporting normal stress levels post-i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05146v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05146v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05146v1-abstract-full" style="display: none;"> This paper presents a Virtual Reality (VR) art therapy known as "Break Times" which aims to enhance students' mental well-being and foster creative expression. The proposed "Break Times" application mimics the art therapy sessions in the VR environment design. Pilot user acceptance test with 10 participants showed a notable reduction in stress levels, with 50% reporting normal stress levels post-intervention, compared to 20% pre-intervention. Participants praised the "Break Times" therapy's functionality and engagement features and suggested improvements such as saving creations, incorporating 3D painting, and expanding the artmaking scene variety. The study highlights that VR art therapy has potential as an effective tool for stress management, emphasizing the need for continued refinement to maximize its therapeutic benefits. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05146v1-abstract-full').style.display = 'none'; document.getElementById('2411.05146v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Part of proceedings of 6th International Conference AsiaHaptics 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05133">arXiv:2411.05133</a> <span> [<a href="https://arxiv.org/pdf/2411.05133">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Innovative Weight Simulation in Virtual Reality Cube Games: A Pseudo-Haptic Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lim%2C+W+N">Woan Ning Lim</a>, <a href="/search/cs?searchtype=author&query=Leong%2C+E+Y+J">Edric Yi Junn Leong</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y+L">Yun Li Lee</a>, <a href="/search/cs?searchtype=author&query=Yap%2C+K+M">Kian Meng Yap</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05133v1-abstract-short" style="display: inline;"> This paper presents an innovative pseudo-haptic model for weight simulation in virtual reality (VR) environments. By integrating visual feedback with voluntary exerted force through a passive haptic glove, the model creates haptic illusions of weight perception. Two VR cube games were developed to evaluate the model's effectiveness. The first game assesses participants' ability to discriminate rel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05133v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05133v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05133v1-abstract-full" style="display: none;"> This paper presents an innovative pseudo-haptic model for weight simulation in virtual reality (VR) environments. By integrating visual feedback with voluntary exerted force through a passive haptic glove, the model creates haptic illusions of weight perception. Two VR cube games were developed to evaluate the model's effectiveness. The first game assesses participants' ability to discriminate relative weights, while the second evaluates their capability to estimate absolute weights. Twelve participants, aged 18 to 59, tested the games. Results suggest that the pseudo-haptic model is effective for relative weight discrimination tasks and holds potential for various VR applications. Further research with a larger participant group and more complex scenarios is recommended to refine and validate the model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05133v1-abstract-full').style.display = 'none'; document.getElementById('2411.05133v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Part of proceedings of 6th International Conference AsiaHaptics 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17632">arXiv:2410.17632</a> <span> [<a href="https://arxiv.org/pdf/2410.17632">pdf</a>, <a href="https://arxiv.org/format/2410.17632">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LMLPA: Language Model Linguistic Personality Assessment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+J">Jingyao Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xian Wang</a>, <a href="/search/cs?searchtype=author&query=Hosio%2C+S">Simo Hosio</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaoxian Xu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lik-Hang Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17632v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) are increasingly used in everyday life and research. One of the most common use cases is conversational interactions, enabled by the language generation capabilities of LLMs. Just as between two humans, a conversation between an LLM-powered entity and a human depends on the personality of the conversants. However, measuring the personality of a given LLM is currently a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17632v2-abstract-full').style.display = 'inline'; document.getElementById('2410.17632v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17632v2-abstract-full" style="display: none;"> Large Language Models (LLMs) are increasingly used in everyday life and research. One of the most common use cases is conversational interactions, enabled by the language generation capabilities of LLMs. Just as between two humans, a conversation between an LLM-powered entity and a human depends on the personality of the conversants. However, measuring the personality of a given LLM is currently a challenge. This paper introduces the Language Model Linguistic Personality Assessment (LMLPA), a system designed to evaluate the linguistic personalities of LLMs. Our system helps to understand LLMs' language generation capabilities by quantitatively assessing the distinct personality traits reflected in their linguistic outputs. Unlike traditional human-centric psychometrics, the LMLPA adapts a personality assessment questionnaire, specifically the Big Five Inventory, to align with the operational capabilities of LLMs, and also incorporates the findings from previous language-based personality measurement literature. To mitigate sensitivity to the order of options, our questionnaire is designed to be open-ended, resulting in textual answers. Thus, the AI rater is needed to transform ambiguous personality information from text responses into clear numerical indicators of personality traits. Utilising Principal Component Analysis and reliability validations, our findings demonstrate that LLMs possess distinct personality traits that can be effectively quantified by the LMLPA. This research contributes to Human-Computer Interaction and Human-Centered AI, providing a robust framework for future studies to refine AI personality assessments and expand their applications in multiple areas, including education and manufacturing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17632v2-abstract-full').style.display = 'none'; document.getElementById('2410.17632v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14964">arXiv:2410.14964</a> <span> [<a href="https://arxiv.org/pdf/2410.14964">pdf</a>, <a href="https://arxiv.org/format/2410.14964">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ChronoFact: Timeline-based Temporal Fact Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Barik%2C+A+M">Anab Maulana Barik</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+W">Wynne Hsu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+M+L">Mong Li Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14964v1-abstract-short" style="display: inline;"> Automated fact verification plays an essential role in fostering trust in the digital space. Despite the growing interest, the verification of temporal facts has not received much attention in the community. Temporal fact verification brings new challenges where cues of the temporal information need to be extracted and temporal reasoning involving various temporal aspects of the text must be appli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14964v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14964v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14964v1-abstract-full" style="display: none;"> Automated fact verification plays an essential role in fostering trust in the digital space. Despite the growing interest, the verification of temporal facts has not received much attention in the community. Temporal fact verification brings new challenges where cues of the temporal information need to be extracted and temporal reasoning involving various temporal aspects of the text must be applied. In this work, we propose an end-to-end solution for temporal fact verification that considers the temporal information in claims to obtain relevant evidence sentences and harness the power of large language model for temporal reasoning. Recognizing that temporal facts often involve events, we model these events in the claim and evidence sentences. We curate two temporal fact datasets to learn time-sensitive representations that encapsulate not only the semantic relationships among the events, but also their chronological proximity. This allows us to retrieve the top-k relevant evidence sentences and provide the context for a large language model to perform temporal reasoning and outputs whether a claim is supported or refuted by the retrieved evidence sentences. Experiment results demonstrate that the proposed approach significantly enhances the accuracy of temporal claim verification, thereby advancing current state-of-the-art in automated fact verification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14964v1-abstract-full').style.display = 'none'; document.getElementById('2410.14964v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08131">arXiv:2410.08131</a> <span> [<a href="https://arxiv.org/pdf/2410.08131">pdf</a>, <a href="https://arxiv.org/format/2410.08131">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Deconstructing equivariant representations in molecular systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+K+L+K">Kin Long Kelvin Lee</a>, <a href="/search/cs?searchtype=author&query=Galkin%2C+M">Mikhail Galkin</a>, <a href="/search/cs?searchtype=author&query=Miret%2C+S">Santiago Miret</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08131v1-abstract-short" style="display: inline;"> Recent equivariant models have shown significant progress in not just chemical property prediction, but as surrogates for dynamical simulations of molecules and materials. Many of the top performing models in this category are built within the framework of tensor products, which preserves equivariance by restricting interactions and transformations to those that are allowed by symmetry selection r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08131v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08131v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08131v1-abstract-full" style="display: none;"> Recent equivariant models have shown significant progress in not just chemical property prediction, but as surrogates for dynamical simulations of molecules and materials. Many of the top performing models in this category are built within the framework of tensor products, which preserves equivariance by restricting interactions and transformations to those that are allowed by symmetry selection rules. Despite being a core part of the modeling process, there has not yet been much attention into understanding what information persists in these equivariant representations, and their general behavior outside of benchmark metrics. In this work, we report on a set of experiments using a simple equivariant graph convolution model on the QM9 dataset, focusing on correlating quantitative performance with the resulting molecular graph embeddings. Our key finding is that, for a scalar prediction task, many of the irreducible representations are simply ignored during training -- specifically those pertaining to vector ($l=1$) and tensor quantities ($l=2$) -- an issue that does not necessarily make itself evident in the test metric. We empirically show that removing some unused orders of spherical harmonics improves model performance, correlating with improved latent space structure. We provide a number of recommendations for future experiments to try and improve efficiency and utilization of equivariant features based on these observations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08131v1-abstract-full').style.display = 'none'; document.getElementById('2410.08131v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in the Findings track at the AI4Mat workshop, NeurIPS 2024 Vancouver, BC</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07147">arXiv:2410.07147</a> <span> [<a href="https://arxiv.org/pdf/2410.07147">pdf</a>, <a href="https://arxiv.org/format/2410.07147">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Taking a turn for the better: Conversation redirection throughout the course of mental-health therapy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+V">Vivian Nguyen</a>, <a href="/search/cs?searchtype=author&query=Jung%2C+S+M">Sang Min Jung</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lillian Lee</a>, <a href="/search/cs?searchtype=author&query=Hull%2C+T+D">Thomas D. Hull</a>, <a href="/search/cs?searchtype=author&query=Danescu-Niculescu-Mizil%2C+C">Cristian Danescu-Niculescu-Mizil</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07147v1-abstract-short" style="display: inline;"> Mental-health therapy involves a complex conversation flow in which patients and therapists continuously negotiate what should be talked about next. For example, therapists might try to shift the conversation's direction to keep the therapeutic process on track and avoid stagnation, or patients might push the discussion towards issues they want to focus on. How do such patient and therapist redi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07147v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07147v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07147v1-abstract-full" style="display: none;"> Mental-health therapy involves a complex conversation flow in which patients and therapists continuously negotiate what should be talked about next. For example, therapists might try to shift the conversation's direction to keep the therapeutic process on track and avoid stagnation, or patients might push the discussion towards issues they want to focus on. How do such patient and therapist redirections relate to the development and quality of their relationship? To answer this question, we introduce a probabilistic measure of the extent to which a certain utterance immediately redirects the flow of the conversation, accounting for both the intention and the actual realization of such a change. We apply this new measure to characterize the development of patient-therapist relationships over multiple sessions in a very large, widely-used online therapy platform. Our analysis reveals that (1) patient control of the conversation's direction generally increases relative to that of the therapist as their relationship progresses; and (2) patients who have less control in the first few sessions are significantly more likely to eventually express dissatisfaction with their therapist and terminate the relationship. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07147v1-abstract-full').style.display = 'none'; document.getElementById('2410.07147v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in the Proceedings of EMNLP (Findings) 2024. Code available at https://convokit.cornell.edu</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14608">arXiv:2408.14608</a> <span> [<a href="https://arxiv.org/pdf/2408.14608">pdf</a>, <a href="https://arxiv.org/format/2408.14608">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Meta Flow Matching: Integrating Vector Fields on the Wasserstein Manifold </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Atanackovic%2C+L">Lazar Atanackovic</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xi Zhang</a>, <a href="/search/cs?searchtype=author&query=Amos%2C+B">Brandon Amos</a>, <a href="/search/cs?searchtype=author&query=Blanchette%2C+M">Mathieu Blanchette</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L+J">Leo J. Lee</a>, <a href="/search/cs?searchtype=author&query=Bengio%2C+Y">Yoshua Bengio</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+A">Alexander Tong</a>, <a href="/search/cs?searchtype=author&query=Neklyudov%2C+K">Kirill Neklyudov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14608v2-abstract-short" style="display: inline;"> Numerous biological and physical processes can be modeled as systems of interacting entities evolving continuously over time, e.g. the dynamics of communicating cells or physical particles. Learning the dynamics of such systems is essential for predicting the temporal evolution of populations across novel samples and unseen environments. Flow-based models allow for learning these dynamics at the p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14608v2-abstract-full').style.display = 'inline'; document.getElementById('2408.14608v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14608v2-abstract-full" style="display: none;"> Numerous biological and physical processes can be modeled as systems of interacting entities evolving continuously over time, e.g. the dynamics of communicating cells or physical particles. Learning the dynamics of such systems is essential for predicting the temporal evolution of populations across novel samples and unseen environments. Flow-based models allow for learning these dynamics at the population level - they model the evolution of the entire distribution of samples. However, current flow-based models are limited to a single initial population and a set of predefined conditions which describe different dynamics. We argue that multiple processes in natural sciences have to be represented as vector fields on the Wasserstein manifold of probability densities. That is, the change of the population at any moment in time depends on the population itself due to the interactions between samples. In particular, this is crucial for personalized medicine where the development of diseases and their respective treatment response depend on the microenvironment of cells specific to each patient. We propose Meta Flow Matching (MFM), a practical approach to integrate along these vector fields on the Wasserstein manifold by amortizing the flow model over the initial populations. Namely, we embed the population of samples using a Graph Neural Network (GNN) and use these embeddings to train a Flow Matching model. This gives MFM the ability to generalize over the initial distributions, unlike previously proposed methods. We demonstrate the ability of MFM to improve the prediction of individual treatment responses on a large-scale multi-patient single-cell drug screen dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14608v2-abstract-full').style.display = 'none'; document.getElementById('2408.14608v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12080">arXiv:2408.12080</a> <span> [<a href="https://arxiv.org/pdf/2408.12080">pdf</a>, <a href="https://arxiv.org/format/2408.12080">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Exploring the Feasibility of Automated Data Standardization using Large Language Models for Seamless Positioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+M+J+L">Max J. L. Lee</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Ju Lin</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+L">Li-Ta Hsu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12080v1-abstract-short" style="display: inline;"> We propose a feasibility study for real-time automated data standardization leveraging Large Language Models (LLMs) to enhance seamless positioning systems in IoT environments. By integrating and standardizing heterogeneous sensor data from smartphones, IoT devices, and dedicated systems such as Ultra-Wideband (UWB), our study ensures data compatibility and improves positioning accuracy using the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12080v1-abstract-full').style.display = 'inline'; document.getElementById('2408.12080v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12080v1-abstract-full" style="display: none;"> We propose a feasibility study for real-time automated data standardization leveraging Large Language Models (LLMs) to enhance seamless positioning systems in IoT environments. By integrating and standardizing heterogeneous sensor data from smartphones, IoT devices, and dedicated systems such as Ultra-Wideband (UWB), our study ensures data compatibility and improves positioning accuracy using the Extended Kalman Filter (EKF). The core components include the Intelligent Data Standardization Module (IDSM), which employs a fine-tuned LLM to convert varied sensor data into a standardized format, and the Transformation Rule Generation Module (TRGM), which automates the creation of transformation rules and scripts for ongoing data standardization. Evaluated in real-time environments, our study demonstrates adaptability and scalability, enhancing operational efficiency and accuracy in seamless navigation. This study underscores the potential of advanced LLMs in overcoming sensor data integration complexities, paving the way for more scalable and precise IoT navigation solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12080v1-abstract-full').style.display = 'none'; document.getElementById('2408.12080v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at IPIN 2024. To be published in IEEE Xplore</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.04013">arXiv:2408.04013</a> <span> [<a href="https://arxiv.org/pdf/2408.04013">pdf</a>, <a href="https://arxiv.org/format/2408.04013">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3664647.3681078">10.1145/3664647.3681078 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> MetaDragonBoat: Exploring Paddling Techniques of Virtual Dragon Boating in a Metaverse Campus </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+W">Wei He</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+S">Shengtian Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuzheng Chen</a>, <a href="/search/cs?searchtype=author&query=Sio%2C+C">Chan-In Sio</a>, <a href="/search/cs?searchtype=author&query=Kan%2C+G+L">Ge Lin Kan</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lik-Hang Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.04013v1-abstract-short" style="display: inline;"> The preservation of cultural heritage, as mandated by the United Nations Sustainable Development Goals (SDGs), is integral to sustainable urban development. This paper focuses on the Dragon Boat Festival, a prominent event in Chinese cultural heritage, and proposes leveraging Virtual Reality (VR), to enhance its preservation and accessibility. Traditionally, participation in the festival's dragon… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04013v1-abstract-full').style.display = 'inline'; document.getElementById('2408.04013v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.04013v1-abstract-full" style="display: none;"> The preservation of cultural heritage, as mandated by the United Nations Sustainable Development Goals (SDGs), is integral to sustainable urban development. This paper focuses on the Dragon Boat Festival, a prominent event in Chinese cultural heritage, and proposes leveraging Virtual Reality (VR), to enhance its preservation and accessibility. Traditionally, participation in the festival's dragon boat races was limited to elite athletes, excluding broader demographics. Our proposed solution, named MetaDragonBoat, enables virtual participation in dragon boat racing, offering immersive experiences that replicate physical exertion through a cultural journey. Thus, we build a digital twin of a university campus located in a region with a rich dragon boat racing tradition. Coupled with three paddling techniques that are enabled by either commercial controllers or physical paddle controllers with haptic feedback, diversified users can engage in realistic rowing experiences. Our results demonstrate that by integrating resistance into the paddle controls, users could simulate the physical effort of dragon boat racing, promoting a deeper understanding and appreciation of this cultural heritage. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04013v1-abstract-full').style.display = 'none'; document.getElementById('2408.04013v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, accepted at ACM MM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.16296">arXiv:2407.16296</a> <span> [<a href="https://arxiv.org/pdf/2407.16296">pdf</a>, <a href="https://arxiv.org/format/2407.16296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Quantum Computing for Climate Resilience and Sustainability Challenges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ho%2C+K+T+M">Kin Tung Michael Ho</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kuan-Cheng Chen</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lily Lee</a>, <a href="/search/cs?searchtype=author&query=Burt%2C+F">Felix Burt</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shang Yu</a>, <a href="/search/cs?searchtype=author&query=Po-Heng"> Po-Heng</a>, <a href="/search/cs?searchtype=author&query=Lee"> Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.16296v1-abstract-short" style="display: inline;"> The escalating impacts of climate change and the increasing demand for sustainable development and natural resource management necessitate innovative technological solutions. Quantum computing (QC) has emerged as a promising tool with the potential to revolutionize these critical areas. This review explores the application of quantum machine learning and optimization techniques for climate change… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16296v1-abstract-full').style.display = 'inline'; document.getElementById('2407.16296v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.16296v1-abstract-full" style="display: none;"> The escalating impacts of climate change and the increasing demand for sustainable development and natural resource management necessitate innovative technological solutions. Quantum computing (QC) has emerged as a promising tool with the potential to revolutionize these critical areas. This review explores the application of quantum machine learning and optimization techniques for climate change prediction and enhancing sustainable development. Traditional computational methods often fall short in handling the scale and complexity of climate models and natural resource management. Quantum advancements, however, offer significant improvements in computational efficiency and problem-solving capabilities. By synthesizing the latest research and developments, this paper highlights how QC and quantum machine learning can optimize multi-infrastructure systems towards climate neutrality. The paper also evaluates the performance of current quantum algorithms and hardware in practical applications and presents realistic cases, i.e., waste-to-energy in anaerobic digestion, disaster prevention in flooding prediction, and new material development for carbon capture. The integration of these quantum technologies promises to drive significant advancements in achieving climate resilience and sustainable development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16296v1-abstract-full').style.display = 'none'; document.getElementById('2407.16296v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15291">arXiv:2407.15291</a> <span> [<a href="https://arxiv.org/pdf/2407.15291">pdf</a>, <a href="https://arxiv.org/format/2407.15291">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Evidence-Based Temporal Fact Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Barik%2C+A+M">Anab Maulana Barik</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+W">Wynne Hsu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+M+L">Mong Li Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15291v2-abstract-short" style="display: inline;"> Automated fact verification plays an essential role in fostering trust in the digital space. Despite the growing interest, the verification of temporal facts has not received much attention in the community. Temporal fact verification brings new challenges where cues of the temporal information need to be extracted and temporal reasoning involving various temporal aspects of the text must be appli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15291v2-abstract-full').style.display = 'inline'; document.getElementById('2407.15291v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15291v2-abstract-full" style="display: none;"> Automated fact verification plays an essential role in fostering trust in the digital space. Despite the growing interest, the verification of temporal facts has not received much attention in the community. Temporal fact verification brings new challenges where cues of the temporal information need to be extracted and temporal reasoning involving various temporal aspects of the text must be applied. In this work, we propose an end-to-end solution for temporal fact verification that considers the temporal information in claims to obtain relevant evidence sentences and harness the power of large language model for temporal reasoning. Recognizing that temporal facts often involve events, we model these events in the claim and evidence sentences. We curate two temporal fact datasets to learn time-sensitive representations that encapsulate not only the semantic relationships among the events, but also their chronological proximity. This allows us to retrieve the top-k relevant evidence sentences and provide the context for a large language model to perform temporal reasoning and outputs whether a claim is supported or refuted by the retrieved evidence sentences. Experiment results demonstrate that the proposed approach significantly enhances the accuracy of temporal claim verification, thereby advancing current state-of-the-art in automated fact verification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15291v2-abstract-full').style.display = 'none'; document.getElementById('2407.15291v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00925">arXiv:2407.00925</a> <span> [<a href="https://arxiv.org/pdf/2407.00925">pdf</a>, <a href="https://arxiv.org/format/2407.00925">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> SIDQL: An Efficient Keyframe Extraction and Motion Reconstruction Framework in Motion Capture </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuling Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Ziru Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuyang Wang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lik-hang Lee</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+P">Pan Hui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00925v1-abstract-short" style="display: inline;"> Metaverse, which integrates the virtual and physical worlds, has emerged as an innovative paradigm for changing people's lifestyles. Motion capture has become a reliable approach to achieve seamless synchronization of the movements between avatars and human beings, which plays an important role in diverse Metaverse applications. However, due to the continuous growth of data, current communication… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00925v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00925v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00925v1-abstract-full" style="display: none;"> Metaverse, which integrates the virtual and physical worlds, has emerged as an innovative paradigm for changing people's lifestyles. Motion capture has become a reliable approach to achieve seamless synchronization of the movements between avatars and human beings, which plays an important role in diverse Metaverse applications. However, due to the continuous growth of data, current communication systems face a significant challenge of meeting the demand of ultra-low latency during application. In addition, current methods also have shortcomings when selecting keyframes, e.g., relying on recognizing motion types and artificially selected keyframes. Therefore, the utilization of keyframe extraction and motion reconstruction techniques could be considered a feasible and promising solution. In this work, a new motion reconstruction algorithm is designed in a spherical coordinate system involving location and velocity information. Then, we formalize the keyframe extraction problem into an optimization problem to reduce the reconstruction error. Using Deep Q-Learning (DQL), the Spherical Interpolation based Deep Q-Learning (SIDQL) framework is proposed to generate proper keyframes for reconstructing the motion sequences. We use the CMU database to train and evaluate the framework. Our scheme can significantly reduce the data volume and transmission latency compared to various baselines while maintaining a reconstruction error of less than 0.09 when extracting five keyframes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00925v1-abstract-full').style.display = 'none'; document.getElementById('2407.00925v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15391">arXiv:2406.15391</a> <span> [<a href="https://arxiv.org/pdf/2406.15391">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.2139/ssrn.4807135">10.2139/ssrn.4807135 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Examining the Legal Status of Digital Assets as Property: A Comparative Analysis of Jurisdictional Approaches </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+L">Luke Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15391v1-abstract-short" style="display: inline;"> This paper examines the complex legal landscape surrounding digital assets, analysing how they are defined and regulated as property across various jurisdictions. As digital assets such as cryptocurrencies and non-fungible tokens (NFTs) increasingly integrate with global economies, their intangible nature presents unique challenges to traditional property law concepts, necessitating a re-evaluatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15391v1-abstract-full').style.display = 'inline'; document.getElementById('2406.15391v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15391v1-abstract-full" style="display: none;"> This paper examines the complex legal landscape surrounding digital assets, analysing how they are defined and regulated as property across various jurisdictions. As digital assets such as cryptocurrencies and non-fungible tokens (NFTs) increasingly integrate with global economies, their intangible nature presents unique challenges to traditional property law concepts, necessitating a re-evaluation of legal definitions and ownership frameworks. This research presents a comparative analysis, reviewing how different legal systems classify and manage digital assets within property law, highlighting the variations in regulatory approaches and their implications on ownership, transfer, and inheritance rights. By examining seminal cases and regulatory developments in major jurisdictions, including the United States, the European Union, and Singapore, this paper explores the emerging trends and potential legal evolutions that could influence the global handling of digital assets. The study aims to contribute to the scholarly discourse by proposing a harmonized approach to digital asset regulation, seeking to balance innovation with legal certainty and consumer protection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15391v1-abstract-full').style.display = 'none'; document.getElementById('2406.15391v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11886">arXiv:2406.11886</a> <span> [<a href="https://arxiv.org/pdf/2406.11886">pdf</a>, <a href="https://arxiv.org/format/2406.11886">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Finance">q-fin.CP</span> </div> </div> <p class="title is-5 mathjax"> Financial Assets Dependency Prediction Utilizing Spatiotemporal Patterns </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Haoren Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+P">Pengfei Zhao</a>, <a href="/search/cs?searchtype=author&query=NG%2C+W+S+H">Wilfred Siu Hung NG</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+D+L">Dik Lun Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11886v1-abstract-short" style="display: inline;"> Financial assets exhibit complex dependency structures, which are crucial for investors to create diversified portfolios to mitigate risk in volatile financial markets. To explore the financial asset dependencies dynamics, we propose a novel approach that models the dependencies of assets as an Asset Dependency Matrix (ADM) and treats the ADM sequences as image sequences. This allows us to leverag… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11886v1-abstract-full').style.display = 'inline'; document.getElementById('2406.11886v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11886v1-abstract-full" style="display: none;"> Financial assets exhibit complex dependency structures, which are crucial for investors to create diversified portfolios to mitigate risk in volatile financial markets. To explore the financial asset dependencies dynamics, we propose a novel approach that models the dependencies of assets as an Asset Dependency Matrix (ADM) and treats the ADM sequences as image sequences. This allows us to leverage deep learning-based video prediction methods to capture the spatiotemporal dependencies among assets. However, unlike images where neighboring pixels exhibit explicit spatiotemporal dependencies due to the natural continuity of object movements, assets in ADM do not have a natural order. This poses challenges to organizing the relational assets to reveal better the spatiotemporal dependencies among neighboring assets for ADM forecasting. To tackle the challenges, we propose the Asset Dependency Neural Network (ADNN), which employs the Convolutional Long Short-Term Memory (ConvLSTM) network, a highly successful method for video prediction. ADNN can employ static and dynamic transformation functions to optimize the representations of the ADM. Through extensive experiments, we demonstrate that our proposed framework consistently outperforms the baselines in the ADM prediction and downstream application tasks. This research contributes to understanding and predicting asset dependencies, offering valuable insights for financial market participants. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11886v1-abstract-full').style.display = 'none'; document.getElementById('2406.11886v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18047">arXiv:2405.18047</a> <span> [<a href="https://arxiv.org/pdf/2405.18047">pdf</a>, <a href="https://arxiv.org/format/2405.18047">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> 2BP: 2-Stage Backpropagation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rae%2C+C">Christopher Rae</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+K+L">Joseph K. L. Lee</a>, <a href="/search/cs?searchtype=author&query=Richings%2C+J">James Richings</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18047v1-abstract-short" style="display: inline;"> As Deep Neural Networks (DNNs) grow in size and complexity, they often exceed the memory capacity of a single accelerator, necessitating the sharding of model parameters across multiple accelerators. Pipeline parallelism is a commonly used sharding strategy for training large DNNs. However, current implementations of pipeline parallelism are being unintentionally bottlenecked by the automatic diff… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18047v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18047v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18047v1-abstract-full" style="display: none;"> As Deep Neural Networks (DNNs) grow in size and complexity, they often exceed the memory capacity of a single accelerator, necessitating the sharding of model parameters across multiple accelerators. Pipeline parallelism is a commonly used sharding strategy for training large DNNs. However, current implementations of pipeline parallelism are being unintentionally bottlenecked by the automatic differentiation tools provided by ML frameworks. This paper introduces 2-stage backpropagation (2BP). By splitting the backward propagation step into two separate stages, we can reduce idle compute time. We tested 2BP on various model architectures and pipelining schedules, achieving increases in throughput in all cases. Using 2BP, we were able to achieve a 1.70x increase in throughput compared to traditional methods when training a LLaMa-like transformer with 7 billion parameters across 4 GPUs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18047v1-abstract-full').style.display = 'none'; document.getElementById('2405.18047v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.08586">arXiv:2405.08586</a> <span> [<a href="https://arxiv.org/pdf/2405.08586">pdf</a>, <a href="https://arxiv.org/format/2405.08586">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.24963/ijcai.2024/127">10.24963/ijcai.2024/127 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Cross-Domain Feature Augmentation for Domain Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yingnan Liu</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+Y">Yingtian Zou</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+R">Rui Qiao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fusheng Liu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+M+L">Mong Li Lee</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+W">Wynne Hsu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.08586v1-abstract-short" style="display: inline;"> Domain generalization aims to develop models that are robust to distribution shifts. Existing methods focus on learning invariance across domains to enhance model robustness, and data augmentation has been widely used to learn invariant predictors, with most methods performing augmentation in the input space. However, augmentation in the input space has limited diversity whereas in the feature spa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08586v1-abstract-full').style.display = 'inline'; document.getElementById('2405.08586v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.08586v1-abstract-full" style="display: none;"> Domain generalization aims to develop models that are robust to distribution shifts. Existing methods focus on learning invariance across domains to enhance model robustness, and data augmentation has been widely used to learn invariant predictors, with most methods performing augmentation in the input space. However, augmentation in the input space has limited diversity whereas in the feature space is more versatile and has shown promising results. Nonetheless, feature semantics is seldom considered and existing feature augmentation methods suffer from a limited variety of augmented features. We decompose features into class-generic, class-specific, domain-generic, and domain-specific components. We propose a cross-domain feature augmentation method named XDomainMix that enables us to increase sample diversity while emphasizing the learning of invariant representations to achieve domain generalization. Experiments on widely used benchmark datasets demonstrate that our proposed method is able to achieve state-of-the-art performance. Quantitative analysis indicates that our feature augmentation approach facilitates the learning of effective models that are invariant across different domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08586v1-abstract-full').style.display = 'none'; document.getElementById('2405.08586v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to the 33rd International Joint Conference on Artificial Intelligence (IJCAI 2024); Code is available at https://github.com/NancyQuris/XDomainMix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.14687">arXiv:2404.14687</a> <span> [<a href="https://arxiv.org/pdf/2404.14687">pdf</a>, <a href="https://arxiv.org/format/2404.14687">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Pegasus-v1 Technical Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jung%2C+R">Raehyuk Jung</a>, <a href="/search/cs?searchtype=author&query=Go%2C+H">Hyojun Go</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+J">Jaehyuk Yi</a>, <a href="/search/cs?searchtype=author&query=Jang%2C+J">Jiho Jang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Daniel Kim</a>, <a href="/search/cs?searchtype=author&query=Suh%2C+J">Jay Suh</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+A">Aiden Lee</a>, <a href="/search/cs?searchtype=author&query=Han%2C+C">Cooper Han</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jae Lee</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jeff Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jin-Young Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Junwan Kim</a>, <a href="/search/cs?searchtype=author&query=Park%2C+K">Kyle Park</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lucas Lee</a>, <a href="/search/cs?searchtype=author&query=Ha%2C+M">Mars Ha</a>, <a href="/search/cs?searchtype=author&query=Seo%2C+M">Minjoon Seo</a>, <a href="/search/cs?searchtype=author&query=Jo%2C+A">Abraham Jo</a>, <a href="/search/cs?searchtype=author&query=Park%2C+E">Ed Park</a>, <a href="/search/cs?searchtype=author&query=Kianinejad%2C+H">Hassan Kianinejad</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">SJ Kim</a>, <a href="/search/cs?searchtype=author&query=Moon%2C+T">Tony Moon</a>, <a href="/search/cs?searchtype=author&query=Jeong%2C+W">Wade Jeong</a>, <a href="/search/cs?searchtype=author&query=Popescu%2C+A">Andrei Popescu</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+E">Esther Kim</a>, <a href="/search/cs?searchtype=author&query=Yoon%2C+E">EK Yoon</a> , et al. (19 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.14687v1-abstract-short" style="display: inline;"> This technical report introduces Pegasus-1, a multimodal language model specialized in video content understanding and interaction through natural language. Pegasus-1 is designed to address the unique challenges posed by video data, such as interpreting spatiotemporal information, to offer nuanced video content comprehension across various lengths. This technical report overviews Pegasus-1's archi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14687v1-abstract-full').style.display = 'inline'; document.getElementById('2404.14687v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.14687v1-abstract-full" style="display: none;"> This technical report introduces Pegasus-1, a multimodal language model specialized in video content understanding and interaction through natural language. Pegasus-1 is designed to address the unique challenges posed by video data, such as interpreting spatiotemporal information, to offer nuanced video content comprehension across various lengths. This technical report overviews Pegasus-1's architecture, training strategies, and its performance in benchmarks on video conversation, zero-shot video question answering, and video summarization. We also explore qualitative characteristics of Pegasus-1 , demonstrating its capabilities as well as its limitations, in order to provide readers a balanced view of its current state and its future direction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14687v1-abstract-full').style.display = 'none'; document.getElementById('2404.14687v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.11898">arXiv:2404.11898</a> <span> [<a href="https://arxiv.org/pdf/2404.11898">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Financial Inclusion and Regulatory Challenges: A Critical Analysis of Digital Banks and Alternative Lenders Through Digital Platforms, Machine Learning, and Large Language Models Integration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+L">Luke Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.11898v1-abstract-short" style="display: inline;"> This paper explores the dual impact of digital banks and alternative lenders on financial inclusion and the regulatory challenges posed by their business models. It discusses the integration of digital platforms, machine learning (ML), and Large Language Models (LLMs) in enhancing financial services accessibility for underserved populations. Through a detailed analysis of operational frameworks an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.11898v1-abstract-full').style.display = 'inline'; document.getElementById('2404.11898v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.11898v1-abstract-full" style="display: none;"> This paper explores the dual impact of digital banks and alternative lenders on financial inclusion and the regulatory challenges posed by their business models. It discusses the integration of digital platforms, machine learning (ML), and Large Language Models (LLMs) in enhancing financial services accessibility for underserved populations. Through a detailed analysis of operational frameworks and technological infrastructures, this research identifies key mechanisms that facilitate broader financial access and mitigate traditional barriers. Additionally, the paper addresses significant regulatory concerns involving data privacy, algorithmic bias, financial stability, and consumer protection. Employing a mixed-methods approach, which combines quantitative financial data analysis with qualitative insights from industry experts, this paper elucidates the complexities of leveraging digital technology to foster financial inclusivity. The findings underscore the necessity of evolving regulatory frameworks that harmonize innovation with comprehensive risk management. This paper concludes with policy recommendations for regulators, financial institutions, and technology providers, aiming to cultivate a more inclusive and stable financial ecosystem through prudent digital technology integration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.11898v1-abstract-full').style.display = 'none'; document.getElementById('2404.11898v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.10536">arXiv:2404.10536</a> <span> [<a href="https://arxiv.org/pdf/2404.10536">pdf</a>, <a href="https://arxiv.org/ps/2404.10536">ps</a>, <a href="https://arxiv.org/format/2404.10536">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking Machine Learning Applications on Heterogeneous Architecture using Reframe </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rae%2C+C">Christopher Rae</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+K+L">Joseph K. L. Lee</a>, <a href="/search/cs?searchtype=author&query=Richings%2C+J">James Richings</a>, <a href="/search/cs?searchtype=author&query=Weiland%2C+M">Michele Weiland</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.10536v2-abstract-short" style="display: inline;"> With the rapid increase in machine learning workloads performed on HPC systems, it is beneficial to regularly perform machine learning specific benchmarks to monitor performance and identify issues. Furthermore, as part of the Edinburgh International Data Facility, EPCC currently hosts a wide range of machine learning accelerators including Nvidia GPUs, the Graphcore Bow Pod64 and Cerebras CS-2, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10536v2-abstract-full').style.display = 'inline'; document.getElementById('2404.10536v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.10536v2-abstract-full" style="display: none;"> With the rapid increase in machine learning workloads performed on HPC systems, it is beneficial to regularly perform machine learning specific benchmarks to monitor performance and identify issues. Furthermore, as part of the Edinburgh International Data Facility, EPCC currently hosts a wide range of machine learning accelerators including Nvidia GPUs, the Graphcore Bow Pod64 and Cerebras CS-2, which are managed via Kubernetes and Slurm. We extended the Reframe framework to support the Kubernetes scheduler backend, and utilise Reframe to perform machine learning benchmarks, and we discuss the preliminary results collected and challenges involved in integrating Reframe across multiple platforms and architectures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10536v2-abstract-full').style.display = 'none'; document.getElementById('2404.10536v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Author accepted version of paper in the PERMAVOST workshop at the 33rd International Symposium on High-Performance Parallel and Distributed Computing (HPDC 24)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.06466">arXiv:2404.06466</a> <span> [<a href="https://arxiv.org/pdf/2404.06466">pdf</a>, <a href="https://arxiv.org/format/2404.06466">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Hyperparameter Selection in Continual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+T+L">Thomas L. Lee</a>, <a href="/search/cs?searchtype=author&query=Hellan%2C+S+P">Sigrid Passano Hellan</a>, <a href="/search/cs?searchtype=author&query=Ericsson%2C+L">Linus Ericsson</a>, <a href="/search/cs?searchtype=author&query=Crowley%2C+E+J">Elliot J. Crowley</a>, <a href="/search/cs?searchtype=author&query=Storkey%2C+A">Amos Storkey</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.06466v2-abstract-short" style="display: inline;"> In continual learning (CL) -- where a learner trains on a stream of data -- standard hyperparameter optimisation (HPO) cannot be applied, as a learner does not have access to all of the data at the same time. This has prompted the development of CL-specific HPO frameworks. The most popular way to tune hyperparameters in CL is to repeatedly train over the whole data stream with different hyperparam… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06466v2-abstract-full').style.display = 'inline'; document.getElementById('2404.06466v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.06466v2-abstract-full" style="display: none;"> In continual learning (CL) -- where a learner trains on a stream of data -- standard hyperparameter optimisation (HPO) cannot be applied, as a learner does not have access to all of the data at the same time. This has prompted the development of CL-specific HPO frameworks. The most popular way to tune hyperparameters in CL is to repeatedly train over the whole data stream with different hyperparameter settings. However, this end-of-training HPO is unusable in practice since a learner can only see the stream once. Hence, there is an open question: what HPO framework should a practitioner use for a CL problem in reality? This paper looks at this question by comparing several realistic HPO frameworks. We find that none of the HPO frameworks considered, including end-of-training HPO, perform consistently better than the rest on popular CL benchmarks. We therefore arrive at a twofold conclusion: a) to be able to discriminate between HPO frameworks there is a need to move beyond the current most commonly used CL benchmarks, and b) on the popular CL benchmarks examined, a CL practitioner should use a realistic HPO framework and can select it based on factors separate from performance, for example compute efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06466v2-abstract-full').style.display = 'none'; document.getElementById('2404.06466v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint, 16 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.03575">arXiv:2404.03575</a> <span> [<a href="https://arxiv.org/pdf/2404.03575">pdf</a>, <a href="https://arxiv.org/format/2404.03575">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DreamScene: 3D Gaussian-based Text-to-3D Scene Generation via Formation Pattern Sampling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+H">Haoran Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+H">Haolin Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenli Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+W">Wenjun Wu</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+Y">Yong Liao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lin Wang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lik-hang Lee</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+P">Pengyuan Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.03575v2-abstract-short" style="display: inline;"> Text-to-3D scene generation holds immense potential for the gaming, film, and architecture sectors. Despite significant progress, existing methods struggle with maintaining high quality, consistency, and editing flexibility. In this paper, we propose DreamScene, a 3D Gaussian-based novel text-to-3D scene generation framework, to tackle the aforementioned three challenges mainly via two strategies.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.03575v2-abstract-full').style.display = 'inline'; document.getElementById('2404.03575v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.03575v2-abstract-full" style="display: none;"> Text-to-3D scene generation holds immense potential for the gaming, film, and architecture sectors. Despite significant progress, existing methods struggle with maintaining high quality, consistency, and editing flexibility. In this paper, we propose DreamScene, a 3D Gaussian-based novel text-to-3D scene generation framework, to tackle the aforementioned three challenges mainly via two strategies. First, DreamScene employs Formation Pattern Sampling (FPS), a multi-timestep sampling strategy guided by the formation patterns of 3D objects, to form fast, semantically rich, and high-quality representations. FPS uses 3D Gaussian filtering for optimization stability, and leverages reconstruction techniques to generate plausible textures. Second, DreamScene employs a progressive three-stage camera sampling strategy, specifically designed for both indoor and outdoor settings, to effectively ensure object-environment integration and scene-wide 3D consistency. Last, DreamScene enhances scene editing flexibility by integrating objects and environments, enabling targeted adjustments. Extensive experiments validate DreamScene's superiority over current state-of-the-art techniques, heralding its wide-ranging potential for diverse applications. Code and demos will be released at https://dreamscene-project.github.io . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.03575v2-abstract-full').style.display = 'none'; document.getElementById('2404.03575v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00874">arXiv:2404.00874</a> <span> [<a href="https://arxiv.org/pdf/2404.00874">pdf</a>, <a href="https://arxiv.org/format/2404.00874">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DiSR-NeRF: Diffusion-Guided View-Consistent Super-Resolution NeRF </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+J+L">Jie Long Lee</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chen Li</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+G+H">Gim Hee Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00874v1-abstract-short" style="display: inline;"> We present DiSR-NeRF, a diffusion-guided framework for view-consistent super-resolution (SR) NeRF. Unlike prior works, we circumvent the requirement for high-resolution (HR) reference images by leveraging existing powerful 2D super-resolution models. Nonetheless, independent SR 2D images are often inconsistent across different views. We thus propose Iterative 3D Synchronization (I3DS) to mitigate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00874v1-abstract-full').style.display = 'inline'; document.getElementById('2404.00874v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00874v1-abstract-full" style="display: none;"> We present DiSR-NeRF, a diffusion-guided framework for view-consistent super-resolution (SR) NeRF. Unlike prior works, we circumvent the requirement for high-resolution (HR) reference images by leveraging existing powerful 2D super-resolution models. Nonetheless, independent SR 2D images are often inconsistent across different views. We thus propose Iterative 3D Synchronization (I3DS) to mitigate the inconsistency problem via the inherent multi-view consistency property of NeRF. Specifically, our I3DS alternates between upscaling low-resolution (LR) rendered images with diffusion models, and updating the underlying 3D representation with standard NeRF training. We further introduce Renoised Score Distillation (RSD), a novel score-distillation objective for 2D image resolution. Our RSD combines features from ancestral sampling and Score Distillation Sampling (SDS) to generate sharp images that are also LR-consistent. Qualitative and quantitative results on both synthetic and real-world datasets demonstrate that our DiSR-NeRF can achieve better results on NeRF super-resolution compared with existing works. Code and video results available at the project website. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00874v1-abstract-full').style.display = 'none'; document.getElementById('2404.00874v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.11384">arXiv:2403.11384</a> <span> [<a href="https://arxiv.org/pdf/2403.11384">pdf</a>, <a href="https://arxiv.org/format/2403.11384">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Towards Massive Interaction with Generalist Robotics: A Systematic Review of XR-enabled Remote Human-Robot Interaction Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xian Wang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+L">Luyao Shen</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lik-Hang Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.11384v3-abstract-short" style="display: inline;"> The rising interest of generalist robots seek to create robots with versatility to handle multiple tasks in a variety of environments, and human will interact with such robots through immersive interfaces. In the context of human-robot interaction (HRI), this survey provides an exhaustive review of the applications of extended reality (XR) technologies in the field of remote HRI. We developed a sy… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11384v3-abstract-full').style.display = 'inline'; document.getElementById('2403.11384v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.11384v3-abstract-full" style="display: none;"> The rising interest of generalist robots seek to create robots with versatility to handle multiple tasks in a variety of environments, and human will interact with such robots through immersive interfaces. In the context of human-robot interaction (HRI), this survey provides an exhaustive review of the applications of extended reality (XR) technologies in the field of remote HRI. We developed a systematic search strategy based on the PRISMA methodology. From the initial 2,561 articles selected, 100 research papers that met our inclusion criteria were included. We categorized and summarized the domain in detail, delving into XR technologies, including augmented reality (AR), virtual reality (VR), and mixed reality (MR), and their applications in facilitating intuitive and effective remote control and interaction with robotic systems. The survey highlights existing articles on the application of XR technologies, user experience enhancement, and various interaction designs for XR in remote HRI, providing insights into current trends and future directions. We also identified potential gaps and opportunities for future research to improve remote HRI systems through XR technology to guide and inform future XR and robotics research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11384v3-abstract-full').style.display = 'none'; document.getElementById('2403.11384v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.08295">arXiv:2403.08295</a> <span> [<a href="https://arxiv.org/pdf/2403.08295">pdf</a>, <a href="https://arxiv.org/format/2403.08295">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gemma: Open Models Based on Gemini Research and Technology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemma+Team"> Gemma Team</a>, <a href="/search/cs?searchtype=author&query=Mesnard%2C+T">Thomas Mesnard</a>, <a href="/search/cs?searchtype=author&query=Hardin%2C+C">Cassidy Hardin</a>, <a href="/search/cs?searchtype=author&query=Dadashi%2C+R">Robert Dadashi</a>, <a href="/search/cs?searchtype=author&query=Bhupatiraju%2C+S">Surya Bhupatiraju</a>, <a href="/search/cs?searchtype=author&query=Pathak%2C+S">Shreya Pathak</a>, <a href="/search/cs?searchtype=author&query=Sifre%2C+L">Laurent Sifre</a>, <a href="/search/cs?searchtype=author&query=Rivi%C3%A8re%2C+M">Morgane Rivi猫re</a>, <a href="/search/cs?searchtype=author&query=Kale%2C+M+S">Mihir Sanjay Kale</a>, <a href="/search/cs?searchtype=author&query=Love%2C+J">Juliette Love</a>, <a href="/search/cs?searchtype=author&query=Tafti%2C+P">Pouya Tafti</a>, <a href="/search/cs?searchtype=author&query=Hussenot%2C+L">L茅onard Hussenot</a>, <a href="/search/cs?searchtype=author&query=Sessa%2C+P+G">Pier Giuseppe Sessa</a>, <a href="/search/cs?searchtype=author&query=Chowdhery%2C+A">Aakanksha Chowdhery</a>, <a href="/search/cs?searchtype=author&query=Roberts%2C+A">Adam Roberts</a>, <a href="/search/cs?searchtype=author&query=Barua%2C+A">Aditya Barua</a>, <a href="/search/cs?searchtype=author&query=Botev%2C+A">Alex Botev</a>, <a href="/search/cs?searchtype=author&query=Castro-Ros%2C+A">Alex Castro-Ros</a>, <a href="/search/cs?searchtype=author&query=Slone%2C+A">Ambrose Slone</a>, <a href="/search/cs?searchtype=author&query=H%C3%A9liou%2C+A">Am茅lie H茅liou</a>, <a href="/search/cs?searchtype=author&query=Tacchetti%2C+A">Andrea Tacchetti</a>, <a href="/search/cs?searchtype=author&query=Bulanova%2C+A">Anna Bulanova</a>, <a href="/search/cs?searchtype=author&query=Paterson%2C+A">Antonia Paterson</a>, <a href="/search/cs?searchtype=author&query=Tsai%2C+B">Beth Tsai</a>, <a href="/search/cs?searchtype=author&query=Shahriari%2C+B">Bobak Shahriari</a> , et al. (83 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.08295v4-abstract-short" style="display: inline;"> This work introduces Gemma, a family of lightweight, state-of-the art open models built from the research and technology used to create Gemini models. Gemma models demonstrate strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Ge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08295v4-abstract-full').style.display = 'inline'; document.getElementById('2403.08295v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.08295v4-abstract-full" style="display: none;"> This work introduces Gemma, a family of lightweight, state-of-the art open models built from the research and technology used to create Gemini models. Gemma models demonstrate strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed description of model development. We believe the responsible release of LLMs is critical for improving the safety of frontier models, and for enabling the next wave of LLM innovations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08295v4-abstract-full').style.display = 'none'; document.getElementById('2403.08295v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05530">arXiv:2403.05530</a> <span> [<a href="https://arxiv.org/pdf/2403.05530">pdf</a>, <a href="https://arxiv.org/format/2403.05530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&query=Georgiev%2C+P">Petko Georgiev</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+V+I">Ving Ian Lei</a>, <a href="/search/cs?searchtype=author&query=Burnell%2C+R">Ryan Burnell</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Libin Bai</a>, <a href="/search/cs?searchtype=author&query=Gulati%2C+A">Anmol Gulati</a>, <a href="/search/cs?searchtype=author&query=Tanzer%2C+G">Garrett Tanzer</a>, <a href="/search/cs?searchtype=author&query=Vincent%2C+D">Damien Vincent</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Z">Zhufeng Pan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shibo Wang</a>, <a href="/search/cs?searchtype=author&query=Mariooryad%2C+S">Soroosh Mariooryad</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yifan Ding</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+X">Xinyang Geng</a>, <a href="/search/cs?searchtype=author&query=Alcober%2C+F">Fred Alcober</a>, <a href="/search/cs?searchtype=author&query=Frostig%2C+R">Roy Frostig</a>, <a href="/search/cs?searchtype=author&query=Omernick%2C+M">Mark Omernick</a>, <a href="/search/cs?searchtype=author&query=Walker%2C+L">Lexi Walker</a>, <a href="/search/cs?searchtype=author&query=Paduraru%2C+C">Cosmin Paduraru</a>, <a href="/search/cs?searchtype=author&query=Sorokin%2C+C">Christina Sorokin</a>, <a href="/search/cs?searchtype=author&query=Tacchetti%2C+A">Andrea Tacchetti</a>, <a href="/search/cs?searchtype=author&query=Gaffney%2C+C">Colin Gaffney</a>, <a href="/search/cs?searchtype=author&query=Daruki%2C+S">Samira Daruki</a>, <a href="/search/cs?searchtype=author&query=Sercinoglu%2C+O">Olcan Sercinoglu</a>, <a href="/search/cs?searchtype=author&query=Gleicher%2C+Z">Zach Gleicher</a>, <a href="/search/cs?searchtype=author&query=Love%2C+J">Juliette Love</a> , et al. (1112 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05530v5-abstract-short" style="display: inline;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v5-abstract-full').style.display = 'inline'; document.getElementById('2403.05530v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05530v5-abstract-full" style="display: none;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February version on the great majority of capabilities and benchmarks; (2) Gemini 1.5 Flash, a more lightweight variant designed for efficiency with minimal regression in quality. Gemini 1.5 models achieve near-perfect recall on long-context retrieval tasks across modalities, improve the state-of-the-art in long-document QA, long-video QA and long-context ASR, and match or surpass Gemini 1.0 Ultra's state-of-the-art performance across a broad set of benchmarks. Studying the limits of Gemini 1.5's long-context ability, we find continued improvement in next-token prediction and near-perfect retrieval (>99%) up to at least 10M tokens, a generational leap over existing models such as Claude 3.0 (200k) and GPT-4 Turbo (128k). Finally, we highlight real-world use cases, such as Gemini 1.5 collaborating with professionals on completing their tasks achieving 26 to 75% time savings across 10 different job categories, as well as surprising new capabilities of large language models at the frontier; when given a grammar manual for Kalamang, a language with fewer than 200 speakers worldwide, the model learns to translate English to Kalamang at a similar level to a person who learned from the same content. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v5-abstract-full').style.display = 'none'; document.getElementById('2403.05530v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05131">arXiv:2403.05131</a> <span> [<a href="https://arxiv.org/pdf/2403.05131">pdf</a>, <a href="https://arxiv.org/format/2403.05131">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Sora as an AGI World Model? A Complete Survey on Text-to-Video Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cho%2C+J">Joseph Cho</a>, <a href="/search/cs?searchtype=author&query=Puspitasari%2C+F+D">Fachrina Dewi Puspitasari</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Sheng Zheng</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+J">Jingyao Zheng</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lik-Hang Lee</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+T">Tae-Ho Kim</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+C+S">Choong Seon Hong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chaoning Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05131v2-abstract-short" style="display: inline;"> The evolution of video generation from text, starting with animating MNIST numbers to simulating the physical world with Sora, has progressed at a breakneck speed over the past seven years. While often seen as a superficial expansion of the predecessor text-to-image generation model, text-to-video generation models are developed upon carefully engineered constituents. Here, we systematically discu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05131v2-abstract-full').style.display = 'inline'; document.getElementById('2403.05131v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05131v2-abstract-full" style="display: none;"> The evolution of video generation from text, starting with animating MNIST numbers to simulating the physical world with Sora, has progressed at a breakneck speed over the past seven years. While often seen as a superficial expansion of the predecessor text-to-image generation model, text-to-video generation models are developed upon carefully engineered constituents. Here, we systematically discuss these elements consisting of but not limited to core building blocks (vision, language, and temporal) and supporting features from the perspective of their contributions to achieving a world model. We employ the PRISMA framework to curate 97 impactful research articles from renowned scientific databases primarily studying video synthesis using text conditions. Upon minute exploration of these manuscripts, we observe that text-to-video generation involves more intricate technologies beyond the plain extension of text-to-image generation. Our additional review into the shortcomings of Sora-generated videos pinpoints the call for more in-depth studies in various enabling aspects of video generation such as dataset, evaluation metric, efficient architecture, and human-controlled generation. Finally, we conclude that the study of the text-to-video generation may still be in its infancy, requiring contribution from the cross-discipline research community towards its advancement as the first step to realize artificial general intelligence (AGI). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05131v2-abstract-full').style.display = 'none'; document.getElementById('2403.05131v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">First complete survey on Text-to-Video Generation, 44 pages, 20 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03170">arXiv:2403.03170</a> <span> [<a href="https://arxiv.org/pdf/2403.03170">pdf</a>, <a href="https://arxiv.org/format/2403.03170">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> SNIFFER: Multimodal Large Language Model for Explainable Out-of-Context Misinformation Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qi%2C+P">Peng Qi</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Z">Zehong Yan</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+W">Wynne Hsu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+M+L">Mong Li Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03170v1-abstract-short" style="display: inline;"> Misinformation is a prevalent societal issue due to its potential high risks. Out-of-context (OOC) misinformation, where authentic images are repurposed with false text, is one of the easiest and most effective ways to mislead audiences. Current methods focus on assessing image-text consistency but lack convincing explanations for their judgments, which is essential for debunking misinformation. W… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03170v1-abstract-full').style.display = 'inline'; document.getElementById('2403.03170v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03170v1-abstract-full" style="display: none;"> Misinformation is a prevalent societal issue due to its potential high risks. Out-of-context (OOC) misinformation, where authentic images are repurposed with false text, is one of the easiest and most effective ways to mislead audiences. Current methods focus on assessing image-text consistency but lack convincing explanations for their judgments, which is essential for debunking misinformation. While Multimodal Large Language Models (MLLMs) have rich knowledge and innate capability for visual reasoning and explanation generation, they still lack sophistication in understanding and discovering the subtle crossmodal differences. In this paper, we introduce SNIFFER, a novel multimodal large language model specifically engineered for OOC misinformation detection and explanation. SNIFFER employs two-stage instruction tuning on InstructBLIP. The first stage refines the model's concept alignment of generic objects with news-domain entities and the second stage leverages language-only GPT-4 generated OOC-specific instruction data to fine-tune the model's discriminatory powers. Enhanced by external tools and retrieval, SNIFFER not only detects inconsistencies between text and image but also utilizes external knowledge for contextual verification. Our experiments show that SNIFFER surpasses the original MLLM by over 40% and outperforms state-of-the-art methods in detection accuracy. SNIFFER also provides accurate and persuasive explanations as validated by quantitative and human evaluations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03170v1-abstract-full').style.display = 'none'; document.getElementById('2403.03170v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.06642">arXiv:2402.06642</a> <span> [<a href="https://arxiv.org/pdf/2402.06642">pdf</a>, <a href="https://arxiv.org/format/2402.06642">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Statistical Finance">q-fin.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> From GARCH to Neural Network for Volatility Forecast </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+P">Pengfei Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Haoren Zhu</a>, <a href="/search/cs?searchtype=author&query=NG%2C+W+S+H">Wilfred Siu Hung NG</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+D+L">Dik Lun Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.06642v1-abstract-short" style="display: inline;"> Volatility, as a measure of uncertainty, plays a crucial role in numerous financial activities such as risk management. The Econometrics and Machine Learning communities have developed two distinct approaches for financial volatility forecasting: the stochastic approach and the neural network (NN) approach. Despite their individual strengths, these methodologies have conventionally evolved in sepa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.06642v1-abstract-full').style.display = 'inline'; document.getElementById('2402.06642v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.06642v1-abstract-full" style="display: none;"> Volatility, as a measure of uncertainty, plays a crucial role in numerous financial activities such as risk management. The Econometrics and Machine Learning communities have developed two distinct approaches for financial volatility forecasting: the stochastic approach and the neural network (NN) approach. Despite their individual strengths, these methodologies have conventionally evolved in separate research trajectories with little interaction between them. This study endeavors to bridge this gap by establishing an equivalence relationship between models of the GARCH family and their corresponding NN counterparts. With the equivalence relationship established, we introduce an innovative approach, named GARCH-NN, for constructing NN-based volatility models. It obtains the NN counterparts of GARCH models and integrates them as components into an established NN architecture, thereby seamlessly infusing volatility stylized facts (SFs) inherent in the GARCH models into the neural network. We develop the GARCH-LSTM model to showcase the power of the GARCH-NN approach. Experiment results validate that amalgamating the NN counterparts of the GARCH family models into established NN models leads to enhanced outcomes compared to employing the stochastic and NN models in isolation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.06642v1-abstract-full').style.display = 'none'; document.getElementById('2402.06642v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI'24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.03988">arXiv:2402.03988</a> <span> [<a href="https://arxiv.org/pdf/2402.03988">pdf</a>, <a href="https://arxiv.org/format/2402.03988">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> REBORN: Reinforcement-Learned Boundary Segmentation with Iterative Training for Unsupervised ASR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tseng%2C+L">Liang-Hsuan Tseng</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+E">En-Pei Hu</a>, <a href="/search/cs?searchtype=author&query=Chiang%2C+C">Cheng-Han Chiang</a>, <a href="/search/cs?searchtype=author&query=Tseng%2C+Y">Yuan Tseng</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Hung-yi Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lin-shan Lee</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+S">Shao-Hua Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.03988v3-abstract-short" style="display: inline;"> Unsupervised automatic speech recognition (ASR) aims to learn the mapping between the speech signal and its corresponding textual transcription without the supervision of paired speech-text data. A word/phoneme in the speech signal is represented by a segment of speech signal with variable length and unknown boundary, and this segmental structure makes learning the mapping between speech and text… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.03988v3-abstract-full').style.display = 'inline'; document.getElementById('2402.03988v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.03988v3-abstract-full" style="display: none;"> Unsupervised automatic speech recognition (ASR) aims to learn the mapping between the speech signal and its corresponding textual transcription without the supervision of paired speech-text data. A word/phoneme in the speech signal is represented by a segment of speech signal with variable length and unknown boundary, and this segmental structure makes learning the mapping between speech and text challenging, especially without paired data. In this paper, we propose REBORN,Reinforcement-Learned Boundary Segmentation with Iterative Training for Unsupervised ASR. REBORN alternates between (1) training a segmentation model that predicts the boundaries of the segmental structures in speech signals and (2) training the phoneme prediction model, whose input is the speech feature segmented by the segmentation model, to predict a phoneme transcription. Since supervised data for training the segmentation model is not available, we use reinforcement learning to train the segmentation model to favor segmentations that yield phoneme sequence predictions with a lower perplexity. We conduct extensive experiments and find that under the same setting, REBORN outperforms all prior unsupervised ASR models on LibriSpeech, TIMIT, and five non-English languages in Multilingual LibriSpeech. We comprehensively analyze why the boundaries learned by REBORN improve the unsupervised ASR performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.03988v3-abstract-full').style.display = 'none'; document.getElementById('2402.03988v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01697">arXiv:2402.01697</a> <span> [<a href="https://arxiv.org/pdf/2402.01697">pdf</a>, <a href="https://arxiv.org/format/2402.01697">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3589334.3645642">10.1145/3589334.3645642 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> APT-Pipe: A Prompt-Tuning Tool for Social Data Annotation using ChatGPT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yiming Zhu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Z">Zhizhuo Yin</a>, <a href="/search/cs?searchtype=author&query=Tyson%2C+G">Gareth Tyson</a>, <a href="/search/cs?searchtype=author&query=Haq%2C+E">Ehsan-Ul Haq</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lik-Hang Lee</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+P">Pan Hui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01697v4-abstract-short" style="display: inline;"> Recent research has highlighted the potential of LLM applications, like ChatGPT, for performing label annotation on social computing text. However, it is already well known that performance hinges on the quality of the input prompts. To address this, there has been a flurry of research into prompt tuning -- techniques and guidelines that attempt to improve the quality of prompts. Yet these largely… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01697v4-abstract-full').style.display = 'inline'; document.getElementById('2402.01697v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01697v4-abstract-full" style="display: none;"> Recent research has highlighted the potential of LLM applications, like ChatGPT, for performing label annotation on social computing text. However, it is already well known that performance hinges on the quality of the input prompts. To address this, there has been a flurry of research into prompt tuning -- techniques and guidelines that attempt to improve the quality of prompts. Yet these largely rely on manual effort and prior knowledge of the dataset being annotated. To address this limitation, we propose APT-Pipe, an automated prompt-tuning pipeline. APT-Pipe aims to automatically tune prompts to enhance ChatGPT's text classification performance on any given dataset. We implement APT-Pipe and test it across twelve distinct text classification datasets. We find that prompts tuned by APT-Pipe help ChatGPT achieve higher weighted F1-score on nine out of twelve experimented datasets, with an improvement of 7.01% on average. We further highlight APT-Pipe's flexibility as a framework by showing how it can be extended to support additional tuning mechanisms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01697v4-abstract-full').style.display = 'none'; document.getElementById('2402.01697v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WWW 2024; Camera-ready version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.13463">arXiv:2401.13463</a> <span> [<a href="https://arxiv.org/pdf/2401.13463">pdf</a>, <a href="https://arxiv.org/format/2401.13463">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> SpeechDPR: End-to-End Spoken Passage Retrieval for Open-Domain Spoken Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chyi-Jiunn Lin</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guan-Ting Lin</a>, <a href="/search/cs?searchtype=author&query=Chuang%2C+Y">Yung-Sung Chuang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+W">Wei-Lun Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shang-Wen Li</a>, <a href="/search/cs?searchtype=author&query=Mohamed%2C+A">Abdelrahman Mohamed</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Hung-yi Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lin-shan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.13463v3-abstract-short" style="display: inline;"> Spoken Question Answering (SQA) is essential for machines to reply to user's question by finding the answer span within a given spoken passage. SQA has been previously achieved without ASR to avoid recognition errors and Out-of-Vocabulary (OOV) problems. However, the real-world problem of Open-domain SQA (openSQA), in which the machine needs to first retrieve passages that possibly contain the ans… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13463v3-abstract-full').style.display = 'inline'; document.getElementById('2401.13463v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.13463v3-abstract-full" style="display: none;"> Spoken Question Answering (SQA) is essential for machines to reply to user's question by finding the answer span within a given spoken passage. SQA has been previously achieved without ASR to avoid recognition errors and Out-of-Vocabulary (OOV) problems. However, the real-world problem of Open-domain SQA (openSQA), in which the machine needs to first retrieve passages that possibly contain the answer from a spoken archive in addition, was never considered. This paper proposes the first known end-to-end framework, Speech Dense Passage Retriever (SpeechDPR), for the retrieval component of the openSQA problem. SpeechDPR learns a sentence-level semantic representation by distilling knowledge from the cascading model of unsupervised ASR (UASR) and text dense retriever (TDR). No manually transcribed speech data is needed. Initial experiments showed performance comparable to the cascading model of UASR and TDR, and significantly better when UASR was poor, verifying this approach is more robust to speech recognition errors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13463v3-abstract-full').style.display = 'none'; document.getElementById('2401.13463v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.07331">arXiv:2401.07331</a> <span> [<a href="https://arxiv.org/pdf/2401.07331">pdf</a>, <a href="https://arxiv.org/format/2401.07331">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.artmed.2024.102995">10.1016/j.artmed.2024.102995 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Rapid Estimation of Left Ventricular Contractility with a Physics-Informed Neural Network Inverse Modeling Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naghavi%2C+E">Ehsan Naghavi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haifeng Wang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+L">Lei Fan</a>, <a href="/search/cs?searchtype=author&query=Choy%2C+J+S">Jenny S. Choy</a>, <a href="/search/cs?searchtype=author&query=Kassab%2C+G">Ghassan Kassab</a>, <a href="/search/cs?searchtype=author&query=Baek%2C+S">Seungik Baek</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lik-Chuan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.07331v1-abstract-short" style="display: inline;"> Physics-based computer models based on numerical solution of the governing equations generally cannot make rapid predictions, which in turn, limits their applications in the clinic. To address this issue, we developed a physics-informed neural network (PINN) model that encodes the physics of a closed-loop blood circulation system embedding a left ventricle (LV). The PINN model is trained to satisf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.07331v1-abstract-full').style.display = 'inline'; document.getElementById('2401.07331v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.07331v1-abstract-full" style="display: none;"> Physics-based computer models based on numerical solution of the governing equations generally cannot make rapid predictions, which in turn, limits their applications in the clinic. To address this issue, we developed a physics-informed neural network (PINN) model that encodes the physics of a closed-loop blood circulation system embedding a left ventricle (LV). The PINN model is trained to satisfy a system of ordinary differential equations (ODEs) associated with a lumped parameter description of the circulatory system. The model predictions have a maximum error of less than 5% when compared to those obtained by solving the ODEs numerically. An inverse modeling approach using the PINN model is also developed to rapidly estimate model parameters (in $\sim$ 3 mins) from single-beat LV pressure and volume waveforms. Using synthetic LV pressure and volume waveforms generated by the PINN model with different model parameter values, we show that the inverse modeling approach can recover the corresponding ground truth values, which suggests that the model parameters are unique. The PINN inverse modeling approach is then applied to estimate LV contractility indexed by the end-systolic elastance $E_{es}$ using waveforms acquired from 11 swine models, including waveforms acquired before and after administration of dobutamine (an inotropic agent) in 3 animals. The estimated $E_{es}$ is about 58% to 284% higher for the data associated with dobutamine compared to those without, which implies that this approach can be used to estimate LV contractility using single-beat measurements. The PINN inverse modeling can potentially be used in the clinic to simultaneously estimate LV contractility and other physiological parameters from single-beat measurements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.07331v1-abstract-full').style.display = 'none'; document.getElementById('2401.07331v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.11805">arXiv:2312.11805</a> <span> [<a href="https://arxiv.org/pdf/2312.11805">pdf</a>, <a href="https://arxiv.org/format/2312.11805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Gemini: A Family of Highly Capable Multimodal Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&query=Anil%2C+R">Rohan Anil</a>, <a href="/search/cs?searchtype=author&query=Borgeaud%2C+S">Sebastian Borgeaud</a>, <a href="/search/cs?searchtype=author&query=Alayrac%2C+J">Jean-Baptiste Alayrac</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jiahui Yu</a>, <a href="/search/cs?searchtype=author&query=Soricut%2C+R">Radu Soricut</a>, <a href="/search/cs?searchtype=author&query=Schalkwyk%2C+J">Johan Schalkwyk</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+A+M">Andrew M. Dai</a>, <a href="/search/cs?searchtype=author&query=Hauth%2C+A">Anja Hauth</a>, <a href="/search/cs?searchtype=author&query=Millican%2C+K">Katie Millican</a>, <a href="/search/cs?searchtype=author&query=Silver%2C+D">David Silver</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+M">Melvin Johnson</a>, <a href="/search/cs?searchtype=author&query=Antonoglou%2C+I">Ioannis Antonoglou</a>, <a href="/search/cs?searchtype=author&query=Schrittwieser%2C+J">Julian Schrittwieser</a>, <a href="/search/cs?searchtype=author&query=Glaese%2C+A">Amelia Glaese</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jilin Chen</a>, <a href="/search/cs?searchtype=author&query=Pitler%2C+E">Emily Pitler</a>, <a href="/search/cs?searchtype=author&query=Lillicrap%2C+T">Timothy Lillicrap</a>, <a href="/search/cs?searchtype=author&query=Lazaridou%2C+A">Angeliki Lazaridou</a>, <a href="/search/cs?searchtype=author&query=Firat%2C+O">Orhan Firat</a>, <a href="/search/cs?searchtype=author&query=Molloy%2C+J">James Molloy</a>, <a href="/search/cs?searchtype=author&query=Isard%2C+M">Michael Isard</a>, <a href="/search/cs?searchtype=author&query=Barham%2C+P+R">Paul R. Barham</a>, <a href="/search/cs?searchtype=author&query=Hennigan%2C+T">Tom Hennigan</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+B">Benjamin Lee</a> , et al. (1325 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.11805v4-abstract-short" style="display: inline;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'inline'; document.getElementById('2312.11805v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.11805v4-abstract-full" style="display: none;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultra model advances the state of the art in 30 of 32 of these benchmarks - notably being the first model to achieve human-expert performance on the well-studied exam benchmark MMLU, and improving the state of the art in every one of the 20 multimodal benchmarks we examined. We believe that the new capabilities of the Gemini family in cross-modal reasoning and language understanding will enable a wide variety of use cases. We discuss our approach toward post-training and deploying Gemini models responsibly to users through services including Gemini, Gemini Advanced, Google AI Studio, and Cloud Vertex AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'none'; document.getElementById('2312.11805v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.09799">arXiv:2312.09799</a> <span> [<a href="https://arxiv.org/pdf/2312.09799">pdf</a>, <a href="https://arxiv.org/format/2312.09799">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/OJCAS.2023.3344094">10.1109/OJCAS.2023.3344094 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> IQNet: Image Quality Assessment Guided Just Noticeable Difference Prefiltering For Versatile Video Coding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yu-Han Sun</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+C+L">Chiang Lo-Hsuan Lee</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+T">Tian-Sheuan Chang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.09799v1-abstract-short" style="display: inline;"> Image prefiltering with just noticeable distortion (JND) improves coding efficiency in a visual lossless way by filtering the perceptually redundant information prior to compression. However, real JND cannot be well modeled with inaccurate masking equations in traditional approaches or image-level subject tests in deep learning approaches. Thus, this paper proposes a fine-grained JND prefiltering… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.09799v1-abstract-full').style.display = 'inline'; document.getElementById('2312.09799v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.09799v1-abstract-full" style="display: none;"> Image prefiltering with just noticeable distortion (JND) improves coding efficiency in a visual lossless way by filtering the perceptually redundant information prior to compression. However, real JND cannot be well modeled with inaccurate masking equations in traditional approaches or image-level subject tests in deep learning approaches. Thus, this paper proposes a fine-grained JND prefiltering dataset guided by image quality assessment for accurate block-level JND modeling. The dataset is constructed from decoded images to include coding effects and is also perceptually enhanced with block overlap and edge preservation. Furthermore, based on this dataset, we propose a lightweight JND prefiltering network, IQNet, which can be applied directly to different quantization cases with the same model and only needs 3K parameters. The experimental results show that the proposed approach to Versatile Video Coding could yield maximum/average bitrate savings of 41\%/15\% and 53\%/19\% for all-intra and low-delay P configurations, respectively, with negligible subjective quality loss. Our method demonstrates higher perceptual quality and a model size that is an order of magnitude smaller than previous deep learning methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.09799v1-abstract-full').style.display = 'none'; document.getElementById('2312.09799v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> in IEEE Open Journal of Circuits and Systems, vol. 5, pp. 17-27, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.08153">arXiv:2312.08153</a> <span> [<a href="https://arxiv.org/pdf/2312.08153">pdf</a>, <a href="https://arxiv.org/format/2312.08153">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> $蟻$-Diffusion: A diffusion-based density estimation framework for computational physics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cai%2C+M+X">Maxwell X. Cai</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K+L+K">Kin Long Kelvin Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.08153v1-abstract-short" style="display: inline;"> In physics, density $蟻(\cdot)$ is a fundamentally important scalar function to model, since it describes a scalar field or a probability density function that governs a physical process. Modeling $蟻(\cdot)$ typically scales poorly with parameter space, however, and quickly becomes prohibitively difficult and computationally expensive. One promising avenue to bypass this is to leverage the capabili… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.08153v1-abstract-full').style.display = 'inline'; document.getElementById('2312.08153v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.08153v1-abstract-full" style="display: none;"> In physics, density $蟻(\cdot)$ is a fundamentally important scalar function to model, since it describes a scalar field or a probability density function that governs a physical process. Modeling $蟻(\cdot)$ typically scales poorly with parameter space, however, and quickly becomes prohibitively difficult and computationally expensive. One promising avenue to bypass this is to leverage the capabilities of denoising diffusion models often used in high-fidelity image generation to parameterize $蟻(\cdot)$ from existing scientific data, from which new samples can be trivially sampled from. In this paper, we propose $蟻$-Diffusion, an implementation of denoising diffusion probabilistic models for multidimensional density estimation in physics, which is currently in active development and, from our results, performs well on physically motivated 2D and 3D density functions. Moreover, we propose a novel hashing technique that allows $蟻$-Diffusion to be conditioned by arbitrary amounts of physical parameters of interest. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.08153v1-abstract-full').style.display = 'none'; document.getElementById('2312.08153v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 2 figures, accepted for publication at the NeurIPS 2023 workshop "Machine Learning and the Physical Sciences"</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Lee%2C+L&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Lee%2C+L&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Lee%2C+L&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Lee%2C+L&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Lee%2C+L&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Lee%2C+L&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Lee%2C+L&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>