Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–43 of 43 results for author: <span class="mathjax">Nguyen, T M</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Nguyen%2C+T+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Nguyen, T M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Nguyen%2C+T+M&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Nguyen, T M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14765">arXiv:2411.14765</a> <span> [<a href="https://arxiv.org/pdf/2411.14765">pdf</a>, <a href="https://arxiv.org/format/2411.14765">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Attention-based Framework for Fair Contrastive Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nielsen%2C+S+K">Stefan K. Nielsen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14765v1-abstract-short" style="display: inline;"> Contrastive learning has proven instrumental in learning unbiased representations of data, especially in complex environments characterized by high-cardinality and high-dimensional sensitive information. However, existing approaches within this setting require predefined modelling assumptions of bias-causing interactions that limit the model's ability to learn debiased representations. In this wor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14765v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14765v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14765v1-abstract-full" style="display: none;"> Contrastive learning has proven instrumental in learning unbiased representations of data, especially in complex environments characterized by high-cardinality and high-dimensional sensitive information. However, existing approaches within this setting require predefined modelling assumptions of bias-causing interactions that limit the model's ability to learn debiased representations. In this work, we propose a new method for fair contrastive learning that employs an attention mechanism to model bias-causing interactions, enabling the learning of a fairer and semantically richer embedding space. In particular, our attention mechanism avoids bias-causing samples that confound the model and focuses on bias-reducing samples that help learn semantically meaningful representations. We verify the advantages of our method against existing baselines in fair contrastive learning and show that our approach can significantly boost bias removal from learned representations without compromising downstream accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14765v1-abstract-full').style.display = 'none'; document.getElementById('2411.14765v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04323">arXiv:2411.04323</a> <span> [<a href="https://arxiv.org/pdf/2411.04323">pdf</a>, <a href="https://arxiv.org/format/2411.04323">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> </div> </div> <p class="title is-5 mathjax"> Efficient Symmetry-Aware Materials Generation via Hierarchical Generative Flow Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tri Minh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Tawfik%2C+S+A">Sherif Abdulkader Tawfik</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+T">Truyen Tran</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+S">Sunil Gupta</a>, <a href="/search/cs?searchtype=author&query=Rana%2C+S">Santu Rana</a>, <a href="/search/cs?searchtype=author&query=Venkatesh%2C+S">Svetha Venkatesh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04323v1-abstract-short" style="display: inline;"> Discovering new solid-state materials requires rapidly exploring the vast space of crystal structures and locating stable regions. Generating stable materials with desired properties and compositions is extremely difficult as we search for very small isolated pockets in the exponentially many possibilities, considering elements from the periodic table and their 3D arrangements in crystal lattices.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04323v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04323v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04323v1-abstract-full" style="display: none;"> Discovering new solid-state materials requires rapidly exploring the vast space of crystal structures and locating stable regions. Generating stable materials with desired properties and compositions is extremely difficult as we search for very small isolated pockets in the exponentially many possibilities, considering elements from the periodic table and their 3D arrangements in crystal lattices. Materials discovery necessitates both optimized solution structures and diversity in the generated material structures. Existing methods struggle to explore large material spaces and generate diverse samples with desired properties and requirements. We propose the Symmetry-aware Hierarchical Architecture for Flow-based Traversal (SHAFT), a novel generative model employing a hierarchical exploration strategy to efficiently exploit the symmetry of the materials space to generate crystal structures given desired properties. In particular, our model decomposes the exponentially large materials space into a hierarchy of subspaces consisting of symmetric space groups, lattice parameters, and atoms. We demonstrate that SHAFT significantly outperforms state-of-the-art iterative generative methods, such as Generative Flow Networks (GFlowNets) and Crystal Diffusion Variational AutoEncoders (CDVAE), in crystal structure generation tasks, achieving higher validity, diversity, and stability of generated structures optimized for target properties and requirements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04323v1-abstract-full').style.display = 'none'; document.getElementById('2411.04323v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14574">arXiv:2410.14574</a> <span> [<a href="https://arxiv.org/pdf/2410.14574">pdf</a>, <a href="https://arxiv.org/format/2410.14574">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> MomentumSMoE: Integrating Momentum into Sparse Mixture of Experts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Teo%2C+R+S+Y">Rachel S. Y. Teo</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14574v1-abstract-short" style="display: inline;"> Sparse Mixture of Experts (SMoE) has become the key to unlocking unparalleled scalability in deep learning. SMoE has the potential to exponentially increase parameter count while maintaining the efficiency of the model by only activating a small subset of these parameters for a given sample. However, it has been observed that SMoE suffers from unstable training and has difficulty adapting to new d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14574v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14574v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14574v1-abstract-full" style="display: none;"> Sparse Mixture of Experts (SMoE) has become the key to unlocking unparalleled scalability in deep learning. SMoE has the potential to exponentially increase parameter count while maintaining the efficiency of the model by only activating a small subset of these parameters for a given sample. However, it has been observed that SMoE suffers from unstable training and has difficulty adapting to new distributions, leading to the model's lack of robustness to data contamination. To overcome these limitations, we first establish a connection between the dynamics of the expert representations in SMoEs and gradient descent on a multi-objective optimization problem. Leveraging our framework, we then integrate momentum into SMoE and propose a new family of SMoEs named MomentumSMoE. We theoretically prove and numerically demonstrate that MomentumSMoE is more stable and robust than SMoE. In particular, we verify the advantages of MomentumSMoE over SMoE on a variety of practical tasks including ImageNet-1K object recognition and WikiText-103 language modeling. We demonstrate the applicability of MomentumSMoE to many types of SMoE models, including those in the Sparse MoE model for vision (V-MoE) and the Generalist Language Model (GLaM). We also show that other advanced momentum-based optimization methods, such as Adam, can be easily incorporated into the MomentumSMoE framework for designing new SMoE models with even better performance, almost negligible additional computation cost, and simple implementations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14574v1-abstract-full').style.display = 'none'; document.getElementById('2410.14574v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages in the main text. Published at NeurIPS 2024. The code is available at https://github.com/rachtsy/MomentumSMoE</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04692">arXiv:2410.04692</a> <span> [<a href="https://arxiv.org/pdf/2410.04692">pdf</a>, <a href="https://arxiv.org/format/2410.04692">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Clifford Algebraic Approach to E(n)-Equivariant High-order Graph Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tran%2C+H">Hoang-Viet Tran</a>, <a href="/search/cs?searchtype=author&query=Vo%2C+T+N">Thieu N. Vo</a>, <a href="/search/cs?searchtype=author&query=Huu%2C+T+T">Tho Tran Huu</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan Minh Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04692v1-abstract-short" style="display: inline;"> Designing neural network architectures that can handle data symmetry is crucial. This is especially important for geometric graphs whose properties are equivariance under Euclidean transformations. Current equivariant graph neural networks (EGNNs), particularly those using message passing, have a limitation in expressive power. Recent high-order graph neural networks can overcome this limitation,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04692v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04692v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04692v1-abstract-full" style="display: none;"> Designing neural network architectures that can handle data symmetry is crucial. This is especially important for geometric graphs whose properties are equivariance under Euclidean transformations. Current equivariant graph neural networks (EGNNs), particularly those using message passing, have a limitation in expressive power. Recent high-order graph neural networks can overcome this limitation, yet they lack equivariance properties, representing a notable drawback in certain applications in chemistry and physical sciences. In this paper, we introduce the Clifford Group Equivariant Graph Neural Networks (CG-EGNNs), a novel EGNN that enhances high-order message passing by integrating high-order local structures in the context of Clifford algebras. As a key benefit of using Clifford algebras, CG-EGNN can learn functions that capture equivariance from positional features. By adopting the high-order message passing mechanism, CG-EGNN gains richer information from neighbors, thus improving model performance. Furthermore, we establish the universality property of the $k$-hop message passing framework, showcasing greater expressive power of CG-EGNNs with additional $k$-hop message passing mechanism. We empirically validate that CG-EGNNs outperform previous methods on various benchmarks including n-body, CMU motion capture, and MD17, highlighting their effectiveness in geometric deep learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04692v1-abstract-full').style.display = 'none'; document.getElementById('2410.04692v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04213">arXiv:2410.04213</a> <span> [<a href="https://arxiv.org/pdf/2410.04213">pdf</a>, <a href="https://arxiv.org/ps/2410.04213">ps</a>, <a href="https://arxiv.org/format/2410.04213">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Equivariant Polynomial Functional Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Vo%2C+T+N">Thieu N. Vo</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+V">Viet-Hoang Tran</a>, <a href="/search/cs?searchtype=author&query=Huu%2C+T+T">Tho Tran Huu</a>, <a href="/search/cs?searchtype=author&query=The%2C+A+N">An Nguyen The</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+T">Thanh Tran</a>, <a href="/search/cs?searchtype=author&query=Nguyen-Nhat%2C+M">Minh-Khoi Nguyen-Nhat</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+D">Duy-Tung Pham</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan Minh Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04213v1-abstract-short" style="display: inline;"> Neural Functional Networks (NFNs) have gained increasing interest due to their wide range of applications, including extracting information from implicit representations of data, editing network weights, and evaluating policies. A key design principle of NFNs is their adherence to the permutation and scaling symmetries inherent in the connectionist structure of the input neural networks. Recent NF… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04213v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04213v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04213v1-abstract-full" style="display: none;"> Neural Functional Networks (NFNs) have gained increasing interest due to their wide range of applications, including extracting information from implicit representations of data, editing network weights, and evaluating policies. A key design principle of NFNs is their adherence to the permutation and scaling symmetries inherent in the connectionist structure of the input neural networks. Recent NFNs have been proposed with permutation and scaling equivariance based on either graph-based message-passing mechanisms or parameter-sharing mechanisms. However, graph-based equivariant NFNs suffer from high memory consumption and long running times. On the other hand, parameter-sharing-based NFNs built upon equivariant linear layers exhibit lower memory consumption and faster running time, yet their expressivity is limited due to the large size of the symmetric group of the input neural networks. The challenge of designing a permutation and scaling equivariant NFN that maintains low memory consumption and running time while preserving expressivity remains unresolved. In this paper, we propose a novel solution with the development of MAGEP-NFN (Monomial mAtrix Group Equivariant Polynomial NFN). Our approach follows the parameter-sharing mechanism but differs from previous works by constructing a nonlinear equivariant layer represented as a polynomial in the input weights. This polynomial formulation enables us to incorporate additional relationships between weights from different input hidden layers, enhancing the model's expressivity while keeping memory consumption and running time low, thereby addressing the aforementioned challenge. We provide empirical evidence demonstrating that MAGEP-NFN achieves competitive performance and efficiency compared to existing baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04213v1-abstract-full').style.display = 'none'; document.getElementById('2410.04213v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04209">arXiv:2410.04209</a> <span> [<a href="https://arxiv.org/pdf/2410.04209">pdf</a>, <a href="https://arxiv.org/format/2410.04209">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Equivariant Neural Functional Networks for Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tran%2C+V">Viet-Hoang Tran</a>, <a href="/search/cs?searchtype=author&query=Vo%2C+T+N">Thieu N. Vo</a>, <a href="/search/cs?searchtype=author&query=The%2C+A+N">An Nguyen The</a>, <a href="/search/cs?searchtype=author&query=Huu%2C+T+T">Tho Tran Huu</a>, <a href="/search/cs?searchtype=author&query=Nguyen-Nhat%2C+M">Minh-Khoi Nguyen-Nhat</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+T">Thanh Tran</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+D">Duy-Tung Pham</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan Minh Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04209v1-abstract-short" style="display: inline;"> This paper systematically explores neural functional networks (NFN) for transformer architectures. NFN are specialized neural networks that treat the weights, gradients, or sparsity patterns of a deep neural network (DNN) as input data and have proven valuable for tasks such as learnable optimizers, implicit data representations, and weight editing. While NFN have been extensively developed for ML… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04209v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04209v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04209v1-abstract-full" style="display: none;"> This paper systematically explores neural functional networks (NFN) for transformer architectures. NFN are specialized neural networks that treat the weights, gradients, or sparsity patterns of a deep neural network (DNN) as input data and have proven valuable for tasks such as learnable optimizers, implicit data representations, and weight editing. While NFN have been extensively developed for MLP and CNN, no prior work has addressed their design for transformers, despite the importance of transformers in modern deep learning. This paper aims to address this gap by providing a systematic study of NFN for transformers. We first determine the maximal symmetric group of the weights in a multi-head attention module as well as a necessary and sufficient condition under which two sets of hyperparameters of the multi-head attention module define the same function. We then define the weight space of transformer architectures and its associated group action, which leads to the design principles for NFN in transformers. Based on these, we introduce Transformer-NFN, an NFN that is equivariant under this group action. Additionally, we release a dataset of more than 125,000 Transformers model checkpoints trained on two datasets with two different tasks, providing a benchmark for evaluating Transformer-NFN and encouraging further research on transformer training and performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04209v1-abstract-full').style.display = 'none'; document.getElementById('2410.04209v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03292">arXiv:2410.03292</a> <span> [<a href="https://arxiv.org/pdf/2410.03292">pdf</a>, <a href="https://arxiv.org/format/2410.03292">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Demystifying the Token Dynamics of Deep Selective State Space Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Vo%2C+T+N">Thieu N Vo</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+T+D">Tung D. Pham</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+X+T">Xin T. Tong</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan Minh Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03292v1-abstract-short" style="display: inline;"> Selective state space models (SSM), such as Mamba, have gained prominence for their effectiveness in modeling sequential data. Despite their outstanding empirical performance, a comprehensive theoretical understanding of deep selective SSM remains elusive, hindering their further development and adoption for applications that need high fidelity. In this paper, we investigate the dynamical properti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03292v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03292v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03292v1-abstract-full" style="display: none;"> Selective state space models (SSM), such as Mamba, have gained prominence for their effectiveness in modeling sequential data. Despite their outstanding empirical performance, a comprehensive theoretical understanding of deep selective SSM remains elusive, hindering their further development and adoption for applications that need high fidelity. In this paper, we investigate the dynamical properties of tokens in a pre-trained Mamba model. In particular, we derive the dynamical system governing the continuous-time limit of the Mamba model and characterize the asymptotic behavior of its solutions. In the one-dimensional case, we prove that only one of the following two scenarios happens: either all tokens converge to zero, or all tokens diverge to infinity. We provide criteria based on model parameters to determine when each scenario occurs. For the convergent scenario, we empirically verify that this scenario negatively impacts the model's performance. For the divergent scenario, we prove that different tokens will diverge to infinity at different rates, thereby contributing unequally to the updates during model training. Based on these investigations, we propose two refinements for the model: excluding the convergent scenario and reordering tokens based on their importance scores, both aimed at improving practical performance. Our experimental results validate these refinements, offering insights into enhancing Mamba's effectiveness in real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03292v1-abstract-full').style.display = 'none'; document.getElementById('2410.03292v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11697">arXiv:2409.11697</a> <span> [<a href="https://arxiv.org/pdf/2409.11697">pdf</a>, <a href="https://arxiv.org/format/2409.11697">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Monomial Matrix Group Equivariant Neural Functional Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tran%2C+H+V">Hoang V. Tran</a>, <a href="/search/cs?searchtype=author&query=Vo%2C+T+N">Thieu N. Vo</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+T+H">Tho H. Tran</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+A+T">An T. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11697v2-abstract-short" style="display: inline;"> Neural functional networks (NFNs) have recently gained significant attention due to their diverse applications, ranging from predicting network generalization and network editing to classifying implicit neural representation. Previous NFN designs often depend on permutation symmetries in neural networks' weights, which traditionally arise from the unordered arrangement of neurons in hidden layers.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11697v2-abstract-full').style.display = 'inline'; document.getElementById('2409.11697v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11697v2-abstract-full" style="display: none;"> Neural functional networks (NFNs) have recently gained significant attention due to their diverse applications, ranging from predicting network generalization and network editing to classifying implicit neural representation. Previous NFN designs often depend on permutation symmetries in neural networks' weights, which traditionally arise from the unordered arrangement of neurons in hidden layers. However, these designs do not take into account the weight scaling symmetries of $\ReLU$ networks, and the weight sign flipping symmetries of $\sin$ or $\Tanh$ networks. In this paper, we extend the study of the group action on the network weights from the group of permutation matrices to the group of monomial matrices by incorporating scaling/sign-flipping symmetries. Particularly, we encode these scaling/sign-flipping symmetries by designing our corresponding equivariant and invariant layers. We name our new family of NFNs the Monomial Matrix Group Equivariant Neural Functional Networks (Monomial-NFN). Because of the expansion of the symmetries, Monomial-NFN has much fewer independent trainable parameters compared to the baseline NFNs in the literature, thus enhancing the model's efficiency. Moreover, for fully connected and convolutional neural networks, we theoretically prove that all groups that leave these networks invariant while acting on their weight spaces are some subgroups of the monomial matrix group. We provide empirical evidence to demonstrate the advantages of our model over existing baselines, achieving competitive performance and efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11697v2-abstract-full').style.display = 'none'; document.getElementById('2409.11697v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages in the main text. Published at NeurIPS 2024. The code is available at https://github.com/MathematicalAI-NUS/Monomial-NFN</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12480">arXiv:2408.12480</a> <span> [<a href="https://arxiv.org/pdf/2408.12480">pdf</a>, <a href="https://arxiv.org/format/2408.12480">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Vintern-1B: An Efficient Multimodal Large Language Model for Vietnamese </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Doan%2C+K+T">Khang T. Doan</a>, <a href="/search/cs?searchtype=author&query=Huynh%2C+B+G">Bao G. Huynh</a>, <a href="/search/cs?searchtype=author&query=Hoang%2C+D+T">Dung T. Hoang</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+T+D">Thuc D. Pham</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+N+H">Nhat H. Pham</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+Q+T+M">Quan T. M. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Vo%2C+B+Q">Bang Q. Vo</a>, <a href="/search/cs?searchtype=author&query=Hoang%2C+S+N">Suong N. Hoang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12480v2-abstract-short" style="display: inline;"> In this report, we introduce Vintern-1B, a reliable 1-billion-parameters multimodal large language model (MLLM) for Vietnamese language tasks. By integrating the Qwen2-0.5B-Instruct language model with the InternViT-300M-448px visual model, Vintern-1B is optimized for a range of applications, including optical character recognition (OCR), document extraction, and general question-answering in Viet… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12480v2-abstract-full').style.display = 'inline'; document.getElementById('2408.12480v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12480v2-abstract-full" style="display: none;"> In this report, we introduce Vintern-1B, a reliable 1-billion-parameters multimodal large language model (MLLM) for Vietnamese language tasks. By integrating the Qwen2-0.5B-Instruct language model with the InternViT-300M-448px visual model, Vintern-1B is optimized for a range of applications, including optical character recognition (OCR), document extraction, and general question-answering in Vietnamese context. The model is fine-tuned on an extensive dataset of over 3 million image-question-answer pairs, achieving robust performance and reliable results across multiple Vietnamese language benchmarks like OpenViVQA and ViTextVQA. Vintern-1B is small enough to fit into various on-device applications easily. Additionally, we have open-sourced several Vietnamese vision question answering (VQA) datasets for text and diagrams, created with Gemini 1.5 Flash. Our models are available at: https://huggingface.co/5CD-AI/Vintern-1B-v2. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12480v2-abstract-full').style.display = 'none'; document.getElementById('2408.12480v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13781">arXiv:2406.13781</a> <span> [<a href="https://arxiv.org/pdf/2406.13781">pdf</a>, <a href="https://arxiv.org/format/2406.13781">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Primal-Dual Framework for Transformers and Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Tam Nguyen</a>, <a href="/search/cs?searchtype=author&query=Ho%2C+N">Nhat Ho</a>, <a href="/search/cs?searchtype=author&query=Bertozzi%2C+A+L">Andrea L. Bertozzi</a>, <a href="/search/cs?searchtype=author&query=Baraniuk%2C+R+G">Richard G. Baraniuk</a>, <a href="/search/cs?searchtype=author&query=Osher%2C+S+J">Stanley J. Osher</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13781v1-abstract-short" style="display: inline;"> Self-attention is key to the remarkable success of transformers in sequence modeling tasks including many applications in natural language processing and computer vision. Like neural network layers, these attention mechanisms are often developed by heuristics and experience. To provide a principled framework for constructing attention layers in transformers, we show that the self-attention corresp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13781v1-abstract-full').style.display = 'inline'; document.getElementById('2406.13781v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13781v1-abstract-full" style="display: none;"> Self-attention is key to the remarkable success of transformers in sequence modeling tasks including many applications in natural language processing and computer vision. Like neural network layers, these attention mechanisms are often developed by heuristics and experience. To provide a principled framework for constructing attention layers in transformers, we show that the self-attention corresponds to the support vector expansion derived from a support vector regression problem, whose primal formulation has the form of a neural network layer. Using our framework, we derive popular attention layers used in practice and propose two new attentions: 1) the Batch Normalized Attention (Attention-BN) derived from the batch normalization layer and 2) the Attention with Scaled Head (Attention-SH) derived from using less training data to fit the SVR model. We empirically demonstrate the advantages of the Attention-BN and Attention-SH in reducing head redundancy, increasing the model's accuracy, and improving the model's efficiency in a variety of practical applications including image and time-series classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13781v1-abstract-full').style.display = 'none'; document.getElementById('2406.13781v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICLR 2023, 26 pages, 4 figures, 14 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13770">arXiv:2406.13770</a> <span> [<a href="https://arxiv.org/pdf/2406.13770">pdf</a>, <a href="https://arxiv.org/format/2406.13770">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Elliptical Attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nielsen%2C+S+K">Stefan K. Nielsen</a>, <a href="/search/cs?searchtype=author&query=Abdullaev%2C+L+U">Laziz U. Abdullaev</a>, <a href="/search/cs?searchtype=author&query=Teo%2C+R+S+Y">Rachel S. Y. Teo</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13770v2-abstract-short" style="display: inline;"> Pairwise dot-product self-attention is key to the success of transformers that achieve state-of-the-art performance across a variety of applications in language and vision. This dot-product self-attention computes attention weights among the input tokens using Euclidean distance, which makes the model prone to representation collapse and vulnerable to contaminated samples. In this paper, we propos… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13770v2-abstract-full').style.display = 'inline'; document.getElementById('2406.13770v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13770v2-abstract-full" style="display: none;"> Pairwise dot-product self-attention is key to the success of transformers that achieve state-of-the-art performance across a variety of applications in language and vision. This dot-product self-attention computes attention weights among the input tokens using Euclidean distance, which makes the model prone to representation collapse and vulnerable to contaminated samples. In this paper, we propose using a Mahalanobis distance metric for computing the attention weights to stretch the underlying feature space in directions of high contextual relevance. In particular, we define a hyper-ellipsoidal neighborhood around each query to increase the attention weights of the tokens lying in the contextually important directions. We term this novel class of attention Elliptical Attention. Our Elliptical Attention provides two benefits: 1) reducing representation collapse and 2) enhancing the model's robustness as Elliptical Attention pays more attention to contextually relevant information rather than focusing on some small subset of informative features. We empirically demonstrate the advantages of Elliptical Attention over the baseline dot-product attention and state-of-the-art attention methods on various practical tasks, including object classification, image segmentation, and language modeling across different data modalities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13770v2-abstract-full').style.display = 'none'; document.getElementById('2406.13770v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages in the main text. Published at NeurIPS 2024. The code is available at https://github.com/stefvk/Elliptical-Attention</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13762">arXiv:2406.13762</a> <span> [<a href="https://arxiv.org/pdf/2406.13762">pdf</a>, <a href="https://arxiv.org/format/2406.13762">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Unveiling the Hidden Structure of Self-Attention via Kernel Principal Component Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Teo%2C+R+S+Y">Rachel S. Y. Teo</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13762v2-abstract-short" style="display: inline;"> The remarkable success of transformers in sequence modeling tasks, spanning various applications in natural language processing and computer vision, is attributed to the critical role of self-attention. Similar to the development of most deep learning models, the construction of these attention mechanisms relies on heuristics and experience. In our work, we derive self-attention from kernel princi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13762v2-abstract-full').style.display = 'inline'; document.getElementById('2406.13762v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13762v2-abstract-full" style="display: none;"> The remarkable success of transformers in sequence modeling tasks, spanning various applications in natural language processing and computer vision, is attributed to the critical role of self-attention. Similar to the development of most deep learning models, the construction of these attention mechanisms relies on heuristics and experience. In our work, we derive self-attention from kernel principal component analysis (kernel PCA) and show that self-attention projects its query vectors onto the principal component axes of its key matrix in a feature space. We then formulate the exact formula for the value matrix in self-attention, theoretically and empirically demonstrating that this value matrix captures the eigenvectors of the Gram matrix of the key vectors in self-attention. Leveraging our kernel PCA framework, we propose Attention with Robust Principal Components (RPC-Attention), a novel class of robust attention that is resilient to data contamination. We empirically demonstrate the advantages of RPC-Attention over softmax attention on the ImageNet-1K object classification, WikiText-103 language modeling, and ADE20K image segmentation task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13762v2-abstract-full').style.display = 'none'; document.getElementById('2406.13762v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages in the main text. Published at NeurIPS 2024. The code is available at https://github.com/rachtsy/KPCA_code</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13725">arXiv:2406.13725</a> <span> [<a href="https://arxiv.org/pdf/2406.13725">pdf</a>, <a href="https://arxiv.org/format/2406.13725">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Tree-Sliced Wasserstein Distance on a System of Lines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tran%2C+V">Viet-Hoang Tran</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+T">Trang Pham</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+T">Tho Tran</a>, <a href="/search/cs?searchtype=author&query=Le%2C+T">Tam Le</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13725v1-abstract-short" style="display: inline;"> Sliced Wasserstein (SW) distance in Optimal Transport (OT) is widely used in various applications thanks to its statistical effectiveness and computational efficiency. On the other hand, Tree Wassenstein (TW) and Tree-sliced Wassenstein (TSW) are instances of OT for probability measures where its ground cost is a tree metric. TSW also has a low computational complexity, i.e. linear to the number o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13725v1-abstract-full').style.display = 'inline'; document.getElementById('2406.13725v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13725v1-abstract-full" style="display: none;"> Sliced Wasserstein (SW) distance in Optimal Transport (OT) is widely used in various applications thanks to its statistical effectiveness and computational efficiency. On the other hand, Tree Wassenstein (TW) and Tree-sliced Wassenstein (TSW) are instances of OT for probability measures where its ground cost is a tree metric. TSW also has a low computational complexity, i.e. linear to the number of edges in the tree. Especially, TSW is identical to SW when the tree is a chain. While SW is prone to loss of topological information of input measures due to relying on one-dimensional projection, TSW is more flexible and has a higher degree of freedom by choosing a tree rather than a line to alleviate the curse of dimensionality in SW. However, for practical applications, popular tree metric sampling methods are heavily built upon given supports, which limits their capacity to adapt to new supports. In this paper, we propose the Tree-Sliced Wasserstein distance on a System of Lines (TSW-SL), which brings a connection between SW and TSW. Compared to SW and TSW, our TSW-SL benefits from the higher degree of freedom of TSW while being suitable to dynamic settings as SW. In TSW-SL, we use a variant of the Radon Transform to project measures onto a system of lines, resulting in measures on a space with a tree metric, then leverage TW to efficiently compute distances between them. We empirically verify the advantages of TSW-SL over the traditional SW by conducting a variety of experiments on gradient flows, image style transfer, and generative models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13725v1-abstract-full').style.display = 'none'; document.getElementById('2406.13725v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages, 6 figures, 2 tables, 4 algorithms</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.15989">arXiv:2402.15989</a> <span> [<a href="https://arxiv.org/pdf/2402.15989">pdf</a>, <a href="https://arxiv.org/format/2402.15989">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> PIDformer: Transformer Meets Control Theory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Tam Nguyen</a>, <a href="/search/cs?searchtype=author&query=Uribe%2C+C+A">C茅sar A. Uribe</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Baraniuk%2C+R+G">Richard G. Baraniuk</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.15989v1-abstract-short" style="display: inline;"> In this work, we address two main shortcomings of transformer architectures: input corruption and rank collapse in their output representation. We unveil self-attention as an autonomous state-space model that inherently promotes smoothness in its solutions, leading to lower-rank outputs and diminished representation capacity. Moreover, the steady-state solution of the model is sensitive to input p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15989v1-abstract-full').style.display = 'inline'; document.getElementById('2402.15989v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.15989v1-abstract-full" style="display: none;"> In this work, we address two main shortcomings of transformer architectures: input corruption and rank collapse in their output representation. We unveil self-attention as an autonomous state-space model that inherently promotes smoothness in its solutions, leading to lower-rank outputs and diminished representation capacity. Moreover, the steady-state solution of the model is sensitive to input perturbations. We incorporate a Proportional-Integral-Derivative (PID) closed-loop feedback control system with a reference point into the model to improve robustness and representation capacity. This integration aims to preserve high-frequency details while bolstering model stability, rendering it more noise-resilient. The resulting controlled state-space model is theoretically proven robust and adept at addressing the rank collapse. Motivated by this control framework, we derive a novel class of transformers, PID-controlled Transformer (PIDformer), aimed at improving robustness and mitigating the rank-collapse issue inherent in softmax transformers. We empirically evaluate the model for advantages and robustness against baseline transformers across various practical tasks, including object classification, image segmentation, and language modeling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15989v1-abstract-full').style.display = 'none'; document.getElementById('2402.15989v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.11813">arXiv:2402.11813</a> <span> [<a href="https://arxiv.org/pdf/2402.11813">pdf</a>, <a href="https://arxiv.org/format/2402.11813">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A novel framework for adaptive stress testing of autonomous vehicles in multi-lane roads </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Trinh%2C+L">Linh Trinh</a>, <a href="/search/cs?searchtype=author&query=Luu%2C+Q">Quang-Hung Luu</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Thai M. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Vu%2C+H+L">Hai L. Vu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.11813v2-abstract-short" style="display: inline;"> Stress testing is an approach for evaluating the reliability of systems under extreme conditions which help reveal vulnerable scenarios that standard testing may overlook. Identifying such scenarios is of great importance in autonomous vehicles (AV) and other safety-critical systems. Since failure events are rare, naive random search approaches require a large number of vehicle operation hours to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11813v2-abstract-full').style.display = 'inline'; document.getElementById('2402.11813v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.11813v2-abstract-full" style="display: none;"> Stress testing is an approach for evaluating the reliability of systems under extreme conditions which help reveal vulnerable scenarios that standard testing may overlook. Identifying such scenarios is of great importance in autonomous vehicles (AV) and other safety-critical systems. Since failure events are rare, naive random search approaches require a large number of vehicle operation hours to identify potential system failures. Adaptive Stress Testing (AST) is a method addressing this constraint by effectively exploring the failure trajectories of AV using a Markov decision process and employs reinforcement learning techniques to identify driving scenarios with high probability of failures. However, existing AST frameworks are able to handle only simple scenarios, such as one vehicle moving longitudinally on a single lane road which is not realistic and has a limited applicability. In this paper, we propose a novel AST framework to systematically explore corner cases of intelligent driving models that can result in safety concerns involving both longitudinal and lateral vehicle's movements. Specially, we develop a new reward function for Deep Reinforcement Learning to guide the AST in identifying crash scenarios based on the collision probability estimate between the AV under test (i.e., the ego vehicle) and the trajectory of other vehicles on the multi-lane roads. To demonstrate the effectiveness of our framework, we tested it with a complex driving model vehicle that can be controlled in both longitudinal and lateral directions. Quantitative and qualitative analyses of our experimental results demonstrate that our framework outperforms the state-of-the-art AST scheme in identifying corner cases with complex driving maneuvers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11813v2-abstract-full').style.display = 'none'; document.getElementById('2402.11813v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.08613">arXiv:2401.08613</a> <span> [<a href="https://arxiv.org/pdf/2401.08613">pdf</a>, <a href="https://arxiv.org/format/2401.08613">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Digital Infrastructure for Connected and Automated Vehicles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luu%2C+Q">Quang-Hung Luu</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Thai M. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+N">Nan Zheng</a>, <a href="/search/cs?searchtype=author&query=Vu%2C+H+L">Hai L. Vu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.08613v1-abstract-short" style="display: inline;"> Connected and automated vehicles (CAV) are expected to deliver a much safer, more efficient, and eco-friendlier mobility. Being an indispensable component of the future transportation, their key driving features of CAVs include not only the automated functionality but also the cooperative capability. Despite the CAVs themselves are emerging and active research areas, there is a lack of a comprehen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08613v1-abstract-full').style.display = 'inline'; document.getElementById('2401.08613v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.08613v1-abstract-full" style="display: none;"> Connected and automated vehicles (CAV) are expected to deliver a much safer, more efficient, and eco-friendlier mobility. Being an indispensable component of the future transportation, their key driving features of CAVs include not only the automated functionality but also the cooperative capability. Despite the CAVs themselves are emerging and active research areas, there is a lack of a comprehensive literature review on the digital infrastructure that enables them. In this paper, we review the requirements and benefits of digital infrastructures for the CAVs including the vehicle built-in, roadside-based, operational and planning infrastructures. We then highlight challenges and opportunities on digital infrastructure research for the CAVs. Our study sheds lights on seamless integration of digital infrastructure for safe operations of CAVs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08613v1-abstract-full').style.display = 'none'; document.getElementById('2401.08613v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages, 2 figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.00751">arXiv:2312.00751</a> <span> [<a href="https://arxiv.org/pdf/2312.00751">pdf</a>, <a href="https://arxiv.org/format/2312.00751">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Mitigating Over-smoothing in Transformers via Regularized Nonlocal Functionals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Tam Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Baraniuk%2C+R+G">Richard G. Baraniuk</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.00751v1-abstract-short" style="display: inline;"> Transformers have achieved remarkable success in a wide range of natural language processing and computer vision applications. However, the representation capacity of a deep transformer model is degraded due to the over-smoothing issue in which the token representations become identical when the model's depth grows. In this work, we show that self-attention layers in transformers minimize a functi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.00751v1-abstract-full').style.display = 'inline'; document.getElementById('2312.00751v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.00751v1-abstract-full" style="display: none;"> Transformers have achieved remarkable success in a wide range of natural language processing and computer vision applications. However, the representation capacity of a deep transformer model is degraded due to the over-smoothing issue in which the token representations become identical when the model's depth grows. In this work, we show that self-attention layers in transformers minimize a functional which promotes smoothness, thereby causing token uniformity. We then propose a novel regularizer that penalizes the norm of the difference between the smooth output tokens from self-attention and the input tokens to preserve the fidelity of the tokens. Minimizing the resulting regularized energy functional, we derive the Neural Transformer with a Regularized Nonlocal Functional (NeuTRENO), a novel class of transformer models that can mitigate the over-smoothing issue. We empirically demonstrate the advantages of NeuTRENO over the baseline transformers and state-of-the-art methods in reducing the over-smoothing of token representations on various practical tasks, including object classification, image segmentation, and language modeling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.00751v1-abstract-full').style.display = 'none'; document.getElementById('2312.00751v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 papes</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.03260">arXiv:2311.03260</a> <span> [<a href="https://arxiv.org/pdf/2311.03260">pdf</a>, <a href="https://arxiv.org/format/2311.03260">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> From Coupled Oscillators to Graph Neural Networks: Reducing Over-smoothing via a Kuramoto Model-based Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Tuan Nguyen</a>, <a href="/search/cs?searchtype=author&query=Honda%2C+H">Hirotada Honda</a>, <a href="/search/cs?searchtype=author&query=Sano%2C+T">Takashi Sano</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+V">Vinh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nakamura%2C+S">Shugo Nakamura</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.03260v2-abstract-short" style="display: inline;"> We propose the Kuramoto Graph Neural Network (KuramotoGNN), a novel class of continuous-depth graph neural networks (GNNs) that employs the Kuramoto model to mitigate the over-smoothing phenomenon, in which node features in GNNs become indistinguishable as the number of layers increases. The Kuramoto model captures the synchronization behavior of non-linear coupled oscillators. Under the view of c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.03260v2-abstract-full').style.display = 'inline'; document.getElementById('2311.03260v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.03260v2-abstract-full" style="display: none;"> We propose the Kuramoto Graph Neural Network (KuramotoGNN), a novel class of continuous-depth graph neural networks (GNNs) that employs the Kuramoto model to mitigate the over-smoothing phenomenon, in which node features in GNNs become indistinguishable as the number of layers increases. The Kuramoto model captures the synchronization behavior of non-linear coupled oscillators. Under the view of coupled oscillators, we first show the connection between Kuramoto model and basic GNN and then over-smoothing phenomenon in GNNs can be interpreted as phase synchronization in Kuramoto model. The KuramotoGNN replaces this phase synchronization with frequency synchronization to prevent the node features from converging into each other while allowing the system to reach a stable synchronized state. We experimentally verify the advantages of the KuramotoGNN over the baseline GNNs and existing methods in reducing over-smoothing on various graph deep learning benchmark tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.03260v2-abstract-full').style.display = 'none'; document.getElementById('2311.03260v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.03235">arXiv:2311.03235</a> <span> [<a href="https://arxiv.org/pdf/2311.03235">pdf</a>, <a href="https://arxiv.org/format/2311.03235">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> p-Laplacian Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Tuan Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Tam Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+V">Vinh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.03235v1-abstract-short" style="display: inline;"> $p$-Laplacian regularization, rooted in graph and image signal processing, introduces a parameter $p$ to control the regularization effect on these data. Smaller values of $p$ promote sparsity and interpretability, while larger values encourage smoother solutions. In this paper, we first show that the self-attention mechanism obtains the minimal Laplacian regularization ($p=2… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.03235v1-abstract-full').style.display = 'inline'; document.getElementById('2311.03235v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.03235v1-abstract-full" style="display: none;"> $p$-Laplacian regularization, rooted in graph and image signal processing, introduces a parameter $p$ to control the regularization effect on these data. Smaller values of $p$ promote sparsity and interpretability, while larger values encourage smoother solutions. In this paper, we first show that the self-attention mechanism obtains the minimal Laplacian regularization ($p=2$) and encourages the smoothness in the architecture. However, the smoothness is not suitable for the heterophilic structure of self-attention in transformers where attention weights between tokens that are in close proximity and non-close ones are assigned indistinguishably. From that insight, we then propose a novel class of transformers, namely the $p$-Laplacian Transformer (p-LaT), which leverages $p$-Laplacian regularization framework to harness the heterophilic features within self-attention layers. In particular, low $p$ values will effectively assign higher attention weights to tokens that are in close proximity to the current token being processed. We empirically demonstrate the advantages of p-LaT over the baseline transformers on a wide range of benchmark datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.03235v1-abstract-full').style.display = 'none'; document.getElementById('2311.03235v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.00317">arXiv:2309.00317</a> <span> [<a href="https://arxiv.org/pdf/2309.00317">pdf</a>, <a href="https://arxiv.org/format/2309.00317">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/DSAA60987.2023.10302627">10.1109/DSAA60987.2023.10302627 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Text-based Approach For Link Prediction on Wikipedia Articles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tran%2C+A+H">Anh Hoang Tran</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tam Minh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Luu%2C+S+T">Son T. Luu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.00317v2-abstract-short" style="display: inline;"> This paper present our work in the DSAA 2023 Challenge about Link Prediction for Wikipedia Articles. We use traditional machine learning models with POS tags (part-of-speech tags) features extracted from text to train the classification model for predicting whether two nodes has the link. Then, we use these tags to test on various machine learning models. We obtained the results by F1 score at 0.9… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00317v2-abstract-full').style.display = 'inline'; document.getElementById('2309.00317v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.00317v2-abstract-full" style="display: none;"> This paper present our work in the DSAA 2023 Challenge about Link Prediction for Wikipedia Articles. We use traditional machine learning models with POS tags (part-of-speech tags) features extracted from text to train the classification model for predicting whether two nodes has the link. Then, we use these tags to test on various machine learning models. We obtained the results by F1 score at 0.99999 and got 7th place in the competition. Our source code is publicly available at this link: https://github.com/Tam1032/DSAA2023-Challenge-Link-prediction-DS-UIT_SAT <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00317v2-abstract-full').style.display = 'none'; document.getElementById('2309.00317v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by DSAA 2023 Conference in the DSAA Student Competition Section</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.12522">arXiv:2307.12522</a> <span> [<a href="https://arxiv.org/pdf/2307.12522">pdf</a>, <a href="https://arxiv.org/format/2307.12522">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Automated Mapping of Adaptive App GUIs from Phones to TVs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+H">Han Hu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+R">Ruiqi Dong</a>, <a href="/search/cs?searchtype=author&query=Grundy%2C+J">John Grundy</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Thai Minh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Huaxiao Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chunyang Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.12522v2-abstract-short" style="display: inline;"> With the increasing interconnection of smart devices, users often desire to adopt the same app on quite different devices for identical tasks, such as watching the same movies on both their smartphones and TVs. However, the significant differences in screen size, aspect ratio, and interaction styles make it challenging to adapt Graphical User Interfaces (GUIs) across these devices. Although there… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.12522v2-abstract-full').style.display = 'inline'; document.getElementById('2307.12522v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.12522v2-abstract-full" style="display: none;"> With the increasing interconnection of smart devices, users often desire to adopt the same app on quite different devices for identical tasks, such as watching the same movies on both their smartphones and TVs. However, the significant differences in screen size, aspect ratio, and interaction styles make it challenging to adapt Graphical User Interfaces (GUIs) across these devices. Although there are millions of apps available on Google Play, only a few thousand are designed to support smart TV displays. Existing techniques to map a mobile app GUI to a TV either adopt a responsive design, which struggles to bridge the substantial gap between phone and TV or use mirror apps for improved video display, which requires hardware support and extra engineering efforts. Instead of developing another app for supporting TVs, we propose a semi-automated approach to generate corresponding adaptive TV GUIs, given the phone GUIs as the input. Based on our empirical study of GUI pairs for TVs and phones in existing apps, we synthesize a list of rules for grouping and classifying phone GUIs, converting them to TV GUIs, and generating dynamic TV layouts and source code for the TV display. Our tool is not only beneficial to developers but also to GUI designers, who can further customize the generated GUIs for their TV app development. An evaluation and user study demonstrate the accuracy of our generated GUIs and the usefulness of our tool. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.12522v2-abstract-full').style.display = 'none'; document.getElementById('2307.12522v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.06620">arXiv:2306.06620</a> <span> [<a href="https://arxiv.org/pdf/2306.06620">pdf</a>, <a href="https://arxiv.org/format/2306.06620">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ARIST: An Effective API Argument Recommendation Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+S">Son Nguyen</a>, <a href="/search/cs?searchtype=author&query=Manh%2C+C+T">Cuong Tran Manh</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+K+T">Kien T. Tran</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Thu-Trang Nguyen</a>, <a href="/search/cs?searchtype=author&query=Ngo%2C+K">Kien-Tuan Ngo</a>, <a href="/search/cs?searchtype=author&query=Vo%2C+H+D">Hieu Dinh Vo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.06620v1-abstract-short" style="display: inline;"> Learning and remembering to use APIs are difficult. Several techniques have been proposed to assist developers in using APIs. Most existing techniques focus on recommending the right API methods to call, but very few techniques focus on recommending API arguments. In this paper, we propose ARIST, a novel automated argument recommendation approach which suggests arguments by predicting developers'… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06620v1-abstract-full').style.display = 'inline'; document.getElementById('2306.06620v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.06620v1-abstract-full" style="display: none;"> Learning and remembering to use APIs are difficult. Several techniques have been proposed to assist developers in using APIs. Most existing techniques focus on recommending the right API methods to call, but very few techniques focus on recommending API arguments. In this paper, we propose ARIST, a novel automated argument recommendation approach which suggests arguments by predicting developers' expectations when they define and use API methods. To implement this idea in the recommendation process, ARIST combines program analysis (PA), language models (LMs), and several features specialized for the recommendation task which consider the functionality of formal parameters and the positional information of code elements (e.g., variables or method calls) in the given context. In ARIST, the LMs and the recommending features are used to suggest the promising candidates identified by PA. Meanwhile, PA navigates the LMs and the features working on the set of the valid candidates which satisfy syntax, accessibility, and type-compatibility constraints defined by the programming language in use. Our evaluation on a large dataset of real-world projects shows that ARIST improves the state-of-the-art approach by 19% and 18% in top-1 precision and recall for recommending arguments of frequently-used libraries. For general argument recommendation task, i.e., recommending arguments for every method call, ARIST outperforms the baseline approaches by up to 125% top-1 accuracy. Moreover, for newly-encountered projects, ARIST achieves more than 60% top-3 accuracy when evaluating on a larger dataset. For working/maintaining projects, with a personalized LM to capture developers' coding practice, ARIST can productively rank the expected arguments at the top-1 position in 7/10 requests. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06620v1-abstract-full').style.display = 'none'; document.getElementById('2306.06620v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.04454">arXiv:2211.04454</a> <span> [<a href="https://arxiv.org/pdf/2211.04454">pdf</a>, <a href="https://arxiv.org/format/2211.04454">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SLATE: A Sequence Labeling Approach for Task Extraction from Free-form Inked Content </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gandhi%2C+A">Apurva Gandhi</a>, <a href="/search/cs?searchtype=author&query=Serrao%2C+R">Ryan Serrao</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+B">Biyi Fang</a>, <a href="/search/cs?searchtype=author&query=Antonius%2C+G">Gilbert Antonius</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+J">Jenna Hong</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tra My Nguyen</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+S">Sheng Yi</a>, <a href="/search/cs?searchtype=author&query=Nosakhare%2C+E">Ehi Nosakhare</a>, <a href="/search/cs?searchtype=author&query=Shaffer%2C+I">Irene Shaffer</a>, <a href="/search/cs?searchtype=author&query=Srinivasan%2C+S">Soundararajan Srinivasan</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+V">Vivek Gupta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.04454v2-abstract-short" style="display: inline;"> We present SLATE, a sequence labeling approach for extracting tasks from free-form content such as digitally handwritten (or "inked") notes on a virtual whiteboard. Our approach allows us to create a single, low-latency model to simultaneously perform sentence segmentation and classification of these sentences into task/non-task sentences. SLATE greatly outperforms a baseline two-model (sentence s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.04454v2-abstract-full').style.display = 'inline'; document.getElementById('2211.04454v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.04454v2-abstract-full" style="display: none;"> We present SLATE, a sequence labeling approach for extracting tasks from free-form content such as digitally handwritten (or "inked") notes on a virtual whiteboard. Our approach allows us to create a single, low-latency model to simultaneously perform sentence segmentation and classification of these sentences into task/non-task sentences. SLATE greatly outperforms a baseline two-model (sentence segmentation followed by classification model) approach, achieving a task F1 score of 84.4%, a sentence segmentation (boundary similarity) score of 88.4% and three times lower latency compared to the baseline. Furthermore, we provide insights into tackling challenges of performing NLP on the inking domain. We release both our code and dataset for this novel task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.04454v2-abstract-full').style.display = 'none'; document.getElementById('2211.04454v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2022 as an Industry Track paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.05794">arXiv:2210.05794</a> <span> [<a href="https://arxiv.org/pdf/2210.05794">pdf</a>, <a href="https://arxiv.org/format/2210.05794">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Designing Robust Transformers using Robust Kernel Density Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+X">Xing Han</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+T">Tongzheng Ren</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan Minh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+K">Khai Nguyen</a>, <a href="/search/cs?searchtype=author&query=Ghosh%2C+J">Joydeep Ghosh</a>, <a href="/search/cs?searchtype=author&query=Ho%2C+N">Nhat Ho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.05794v3-abstract-short" style="display: inline;"> Recent advances in Transformer architectures have empowered their empirical success in a variety of tasks across different domains. However, existing works mainly focus on predictive accuracy and computational cost, without considering other practical issues, such as robustness to contaminated samples. Recent work by Nguyen et al., (2022) has shown that the self-attention mechanism, which is the c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.05794v3-abstract-full').style.display = 'inline'; document.getElementById('2210.05794v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.05794v3-abstract-full" style="display: none;"> Recent advances in Transformer architectures have empowered their empirical success in a variety of tasks across different domains. However, existing works mainly focus on predictive accuracy and computational cost, without considering other practical issues, such as robustness to contaminated samples. Recent work by Nguyen et al., (2022) has shown that the self-attention mechanism, which is the center of the Transformer architecture, can be viewed as a non-parametric estimator based on kernel density estimation (KDE). This motivates us to leverage a set of robust kernel density estimation methods for alleviating the issue of data contamination. Specifically, we introduce a series of self-attention mechanisms that can be incorporated into different Transformer architectures and discuss the special properties of each method. We then perform extensive empirical studies on language modeling and image classification tasks. Our methods demonstrate robust performance in multiple scenarios while maintaining competitive results on clean datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.05794v3-abstract-full').style.display = 'none'; document.getElementById('2210.05794v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2023 as a poster; 23 pages, 5 figures, 11 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.07096">arXiv:2202.07096</a> <span> [<a href="https://arxiv.org/pdf/2202.07096">pdf</a>, <a href="https://arxiv.org/format/2202.07096">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Learning to Discover Medicines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tri Minh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Thin Nguyen</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+T">Truyen Tran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.07096v1-abstract-short" style="display: inline;"> Discovering new medicines is the hallmark of human endeavor to live a better and longer life. Yet the pace of discovery has slowed down as we need to venture into more wildly unexplored biomedical space to find one that matches today's high standard. Modern AI-enabled by powerful computing, large biomedical databases, and breakthroughs in deep learning-offers a new hope to break this loop as AI is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.07096v1-abstract-full').style.display = 'inline'; document.getElementById('2202.07096v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.07096v1-abstract-full" style="display: none;"> Discovering new medicines is the hallmark of human endeavor to live a better and longer life. Yet the pace of discovery has slowed down as we need to venture into more wildly unexplored biomedical space to find one that matches today's high standard. Modern AI-enabled by powerful computing, large biomedical databases, and breakthroughs in deep learning-offers a new hope to break this loop as AI is rapidly maturing, ready to make a huge impact in the area. In this paper we review recent advances in AI methodologies that aim to crack this challenge. We organize the vast and rapidly growing literature of AI for drug discovery into three relatively stable sub-areas: (a) representation learning over molecular sequences and geometric graphs; (b) data-driven reasoning where we predict molecular properties and their binding, optimize existing compounds, generate de novo molecules, and plan the synthesis of target molecules; and (c) knowledge-based reasoning where we discuss the construction and reasoning over biomedical knowledge graphs. We will also identify open challenges and chart possible research directions for the years to come. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.07096v1-abstract-full').style.display = 'none'; document.getElementById('2202.07096v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.01195">arXiv:2202.01195</a> <span> [<a href="https://arxiv.org/pdf/2202.01195">pdf</a>, <a href="https://arxiv.org/format/2202.01195">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mitigating cold start problems in drug-target affinity prediction with interaction knowledge transferring </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tri Minh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Thin Nguyen</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+T">Truyen Tran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.01195v1-abstract-short" style="display: inline;"> Motivation: Predicting the drug-target interaction is crucial for drug discovery as well as drug repurposing. Machine learning is commonly used in drug-target affinity (DTA) problem. However, machine learning model faces the cold-start problem where the model performance drops when predicting the interaction of a novel drug or target. Previous works try to solve the cold start problem by learning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.01195v1-abstract-full').style.display = 'inline'; document.getElementById('2202.01195v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.01195v1-abstract-full" style="display: none;"> Motivation: Predicting the drug-target interaction is crucial for drug discovery as well as drug repurposing. Machine learning is commonly used in drug-target affinity (DTA) problem. However, machine learning model faces the cold-start problem where the model performance drops when predicting the interaction of a novel drug or target. Previous works try to solve the cold start problem by learning the drug or target representation using unsupervised learning. While the drug or target representation can be learned in an unsupervised manner, it still lacks the interaction information, which is critical in drug-target interaction. Results: To incorporate the interaction information into the drug and protein interaction, we proposed using transfer learning from chemical-chemical interaction (CCI) and protein-protein interaction (PPI) task to drug-target interaction task. The representation learned by CCI and PPI tasks can be transferred smoothly to the DTA task due to the similar nature of the tasks. The result on the drug-target affinity datasets shows that our proposed method has advantages compared to other pretraining methods in the DTA task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.01195v1-abstract-full').style.display = 'none'; document.getElementById('2202.01195v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.08678">arXiv:2110.08678</a> <span> [<a href="https://arxiv.org/pdf/2110.08678">pdf</a>, <a href="https://arxiv.org/format/2110.08678">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Improving Transformers with Probabilistic Attention Keys </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Tam Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Le%2C+D+D">Dung D. Le</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+D+K">Duy Khuong Nguyen</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+V">Viet-Anh Tran</a>, <a href="/search/cs?searchtype=author&query=Baraniuk%2C+R+G">Richard G. Baraniuk</a>, <a href="/search/cs?searchtype=author&query=Ho%2C+N">Nhat Ho</a>, <a href="/search/cs?searchtype=author&query=Osher%2C+S+J">Stanley J. Osher</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.08678v2-abstract-short" style="display: inline;"> Multi-head attention is a driving force behind state-of-the-art transformers, which achieve remarkable performance across a variety of natural language processing (NLP) and computer vision tasks. It has been observed that for many applications, those attention heads learn redundant embedding, and most of them can be removed without degrading the performance of the model. Inspired by this observati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.08678v2-abstract-full').style.display = 'inline'; document.getElementById('2110.08678v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.08678v2-abstract-full" style="display: none;"> Multi-head attention is a driving force behind state-of-the-art transformers, which achieve remarkable performance across a variety of natural language processing (NLP) and computer vision tasks. It has been observed that for many applications, those attention heads learn redundant embedding, and most of them can be removed without degrading the performance of the model. Inspired by this observation, we propose Transformer with a Mixture of Gaussian Keys (Transformer-MGK), a novel transformer architecture that replaces redundant heads in transformers with a mixture of keys at each head. These mixtures of keys follow a Gaussian mixture model and allow each attention head to focus on different parts of the input sequence efficiently. Compared to its conventional transformer counterpart, Transformer-MGK accelerates training and inference, has fewer parameters, and requires fewer FLOPs to compute while achieving comparable or better accuracy across tasks. Transformer-MGK can also be easily extended to use with linear attention. We empirically demonstrate the advantage of Transformer-MGK in a range of practical applications, including language modeling and tasks that involve very long sequences. On the Wikitext-103 and Long Range Arena benchmark, Transformer-MGKs with 4 heads attain comparable or better performance to the baseline transformers with 8 heads. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.08678v2-abstract-full').style.display = 'none'; document.getElementById('2110.08678v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 16 figures, 10 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 39th International Conference on Machine Learning, Baltimore, Maryland, USA, PMLR 162, 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.04840">arXiv:2110.04840</a> <span> [<a href="https://arxiv.org/pdf/2110.04840">pdf</a>, <a href="https://arxiv.org/format/2110.04840">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Dynamical Systems">math.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> Heavy Ball Neural Ordinary Differential Equations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xia%2C+H">Hedi Xia</a>, <a href="/search/cs?searchtype=author&query=Suliafu%2C+V">Vai Suliafu</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+H">Hangjie Ji</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Bertozzi%2C+A+L">Andrea L. Bertozzi</a>, <a href="/search/cs?searchtype=author&query=Osher%2C+S+J">Stanley J. Osher</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.04840v1-abstract-short" style="display: inline;"> We propose heavy ball neural ordinary differential equations (HBNODEs), leveraging the continuous limit of the classical momentum accelerated gradient descent, to improve neural ODEs (NODEs) training and inference. HBNODEs have two properties that imply practical advantages over NODEs: (i) The adjoint state of an HBNODE also satisfies an HBNODE, accelerating both forward and backward ODE solvers,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.04840v1-abstract-full').style.display = 'inline'; document.getElementById('2110.04840v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.04840v1-abstract-full" style="display: none;"> We propose heavy ball neural ordinary differential equations (HBNODEs), leveraging the continuous limit of the classical momentum accelerated gradient descent, to improve neural ODEs (NODEs) training and inference. HBNODEs have two properties that imply practical advantages over NODEs: (i) The adjoint state of an HBNODE also satisfies an HBNODE, accelerating both forward and backward ODE solvers, thus significantly reducing the number of function evaluations (NFEs) and improving the utility of the trained models. (ii) The spectrum of HBNODEs is well structured, enabling effective learning of long-term dependencies from complex sequential data. We verify the advantages of HBNODEs over NODEs on benchmark tasks, including image classification, learning complex dynamics, and sequential modeling. Our method requires remarkably fewer forward and backward NFEs, is more accurate, and learns long-term dependencies more effectively than the other ODE-based neural network models. Code is available at \url{https://github.com/hedixia/HeavyBallNODE}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.04840v1-abstract-full').style.display = 'none'; document.getElementById('2110.04840v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 9 figures, Accepted for publication at Advances in Neural Information Processing Systems (NeurIPS) 2021</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T07 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.12777">arXiv:2109.12777</a> <span> [<a href="https://arxiv.org/pdf/2109.12777">pdf</a>, <a href="https://arxiv.org/format/2109.12777">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ReINTEL Challenge 2020: A Comparative Study of Hybrid Deep Neural Network for Reliable Intelligence Identification on Vietnamese SNSs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Trinh%2C+H+V">Hoang Viet Trinh</a>, <a href="/search/cs?searchtype=author&query=Bui%2C+T+T">Tung Tien Bui</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tam Minh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Dao%2C+H+Q">Huy Quang Dao</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+Q+H">Quang Huu Pham</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+N+N">Ngoc N. Tran</a>, <a href="/search/cs?searchtype=author&query=Thanh%2C+T+M">Ta Minh Thanh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.12777v1-abstract-short" style="display: inline;"> The overwhelming abundance of data has created a misinformation crisis. Unverified sensationalism that is designed to grab the readers' short attention span, when crafted with malice, has caused irreparable damage to our society's structure. As a result, determining the reliability of an article has become a crucial task. After various ablation studies, we propose a multi-input model that can effe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.12777v1-abstract-full').style.display = 'inline'; document.getElementById('2109.12777v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.12777v1-abstract-full" style="display: none;"> The overwhelming abundance of data has created a misinformation crisis. Unverified sensationalism that is designed to grab the readers' short attention span, when crafted with malice, has caused irreparable damage to our society's structure. As a result, determining the reliability of an article has become a crucial task. After various ablation studies, we propose a multi-input model that can effectively leverage both tabular metadata and post content for the task. Applying state-of-the-art finetuning techniques for the pretrained component and training strategies for our complete model, we have achieved a 0.9462 ROC-score on the VLSP private test set. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.12777v1-abstract-full').style.display = 'none'; document.getElementById('2109.12777v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 7th International Workshop on Vietnamese Language and Speech Processing (VLSP), Hanoi, Vietnam, 2020, pp. 6-12 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.02347">arXiv:2108.02347</a> <span> [<a href="https://arxiv.org/pdf/2108.02347">pdf</a>, <a href="https://arxiv.org/format/2108.02347">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> FMMformer: Efficient and Flexible Transformer via Decomposed Near-field and Far-field Attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Suliafu%2C+V">Vai Suliafu</a>, <a href="/search/cs?searchtype=author&query=Osher%2C+S+J">Stanley J. Osher</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Long Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.02347v1-abstract-short" style="display: inline;"> We propose FMMformers, a class of efficient and flexible transformers inspired by the celebrated fast multipole method (FMM) for accelerating interacting particle simulation. FMM decomposes particle-particle interaction into near-field and far-field components and then performs direct and coarse-grained computation, respectively. Similarly, FMMformers decompose the attention into near-field and fa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.02347v1-abstract-full').style.display = 'inline'; document.getElementById('2108.02347v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.02347v1-abstract-full" style="display: none;"> We propose FMMformers, a class of efficient and flexible transformers inspired by the celebrated fast multipole method (FMM) for accelerating interacting particle simulation. FMM decomposes particle-particle interaction into near-field and far-field components and then performs direct and coarse-grained computation, respectively. Similarly, FMMformers decompose the attention into near-field and far-field attention, modeling the near-field attention by a banded matrix and the far-field attention by a low-rank matrix. Computing the attention matrix for FMMformers requires linear complexity in computational time and memory footprint with respect to the sequence length. In contrast, standard transformers suffer from quadratic complexity. We analyze and validate the advantage of FMMformers over the standard transformer on the Long Range Arena and language modeling benchmarks. FMMformers can even outperform the standard transformer in terms of accuracy by a significant margin. For instance, FMMformers achieve an average classification accuracy of $60.74\%$ over the five Long Range Arena tasks, which is significantly better than the standard transformer's average accuracy of $58.70\%$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.02347v1-abstract-full').style.display = 'none'; document.getElementById('2108.02347v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 8 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T07 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.12255">arXiv:2104.12255</a> <span> [<a href="https://arxiv.org/pdf/2104.12255">pdf</a>, <a href="https://arxiv.org/format/2104.12255">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> 0 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+Q+T+M">Quan Thoi Minh Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.12255v1-abstract-short" style="display: inline;"> What is the funniest number in cryptography? 0. The reason is that for all x, x*0 = 0, i.e., the equation is always satisfied no matter what x is. This article discusses crypto bugs in four BLS signatures' libraries (ethereum/py ecc, supranational/blst, herumi/bls, sigp/milagro bls) that revolve around 0. Furthermore, we develop "splitting zero" attacks to show a weakness in the proof-of-possessio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.12255v1-abstract-full').style.display = 'inline'; document.getElementById('2104.12255v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.12255v1-abstract-full" style="display: none;"> What is the funniest number in cryptography? 0. The reason is that for all x, x*0 = 0, i.e., the equation is always satisfied no matter what x is. This article discusses crypto bugs in four BLS signatures' libraries (ethereum/py ecc, supranational/blst, herumi/bls, sigp/milagro bls) that revolve around 0. Furthermore, we develop "splitting zero" attacks to show a weakness in the proof-of-possession aggregate signature scheme standardized in BLS RFC draft v4. Eth2 bug bounties program generously awarded $35,000 in total for the reported bugs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.12255v1-abstract-full').style.display = 'none'; document.getElementById('2104.12255v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.12983">arXiv:2103.12983</a> <span> [<a href="https://arxiv.org/pdf/2103.12983">pdf</a>, <a href="https://arxiv.org/format/2103.12983">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Counterfactual Explanation with Multi-Agent Reinforcement Learning for Drug Target Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tri Minh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Quinn%2C+T+P">Thomas P Quinn</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Thin Nguyen</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+T">Truyen Tran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.12983v2-abstract-short" style="display: inline;"> Motivation: Many high-performance DTA models have been proposed, but they are mostly black-box and thus lack human interpretability. Explainable AI (XAI) can make DTA models more trustworthy, and can also enable scientists to distill biological knowledge from the models. Counterfactual explanation is one popular approach to explaining the behaviour of a deep neural network, which works by systemat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.12983v2-abstract-full').style.display = 'inline'; document.getElementById('2103.12983v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.12983v2-abstract-full" style="display: none;"> Motivation: Many high-performance DTA models have been proposed, but they are mostly black-box and thus lack human interpretability. Explainable AI (XAI) can make DTA models more trustworthy, and can also enable scientists to distill biological knowledge from the models. Counterfactual explanation is one popular approach to explaining the behaviour of a deep neural network, which works by systematically answering the question "How would the model output change if the inputs were changed in this way?". Most counterfactual explanation methods only operate on single input data. It remains an open problem how to extend counterfactual-based XAI methods to DTA models, which have two inputs, one for drug and one for target, that also happen to be discrete in nature. Methods: We propose a multi-agent reinforcement learning framework, Multi-Agent Counterfactual Drug target binding Affinity (MACDA), to generate counterfactual explanations for the drug-protein complex. Our proposed framework provides human-interpretable counterfactual instances while optimizing both the input drug and target for counterfactual generation at the same time. Results: We benchmark the proposed MACDA framework using the Davis dataset and find that our framework produces more parsimonious explanations with no loss in explanation validity, as measured by encoding similarity and QED. We then present a case study involving ABL1 and Nilotinib to demonstrate how MACDA can explain the behaviour of a DTA model in the underlying substructure interaction between inputs in its prediction, revealing mechanisms that align with prior domain knowledge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.12983v2-abstract-full').style.display = 'none'; document.getElementById('2103.12983v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.12139">arXiv:2102.12139</a> <span> [<a href="https://arxiv.org/pdf/2102.12139">pdf</a>, <a href="https://arxiv.org/format/2102.12139">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ACOMP50827.2020.00015">10.1109/ACOMP50827.2020.00015 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Interpreting the Latent Space of Generative Adversarial Networks using Supervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Van%2C+T+P">Toan Pham Van</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tam Minh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+N+N">Ngoc N. Tran</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+H+V">Hoai Viet Nguyen</a>, <a href="/search/cs?searchtype=author&query=Doan%2C+L+B">Linh Bao Doan</a>, <a href="/search/cs?searchtype=author&query=Dao%2C+H+Q">Huy Quang Dao</a>, <a href="/search/cs?searchtype=author&query=Minh%2C+T+T">Thanh Ta Minh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.12139v1-abstract-short" style="display: inline;"> With great progress in the development of Generative Adversarial Networks (GANs), in recent years, the quest for insights in understanding and manipulating the latent space of GAN has gained more and more attention due to its wide range of applications. While most of the researches on this task have focused on unsupervised learning method, which induces difficulties in training and limitation in r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.12139v1-abstract-full').style.display = 'inline'; document.getElementById('2102.12139v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.12139v1-abstract-full" style="display: none;"> With great progress in the development of Generative Adversarial Networks (GANs), in recent years, the quest for insights in understanding and manipulating the latent space of GAN has gained more and more attention due to its wide range of applications. While most of the researches on this task have focused on unsupervised learning method, which induces difficulties in training and limitation in results, our work approaches another direction, encoding human's prior knowledge to discover more about the hidden space of GAN. With this supervised manner, we produce promising results, demonstrated by accurate manipulation of generated images. Even though our model is more suitable for task-specific problems, we hope that its ease in implementation, preciseness, robustness, and the allowance of richer set of properties (compared to other approaches) for image manipulation can enhance the result of many current applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.12139v1-abstract-full').style.display = 'none'; document.getElementById('2102.12139v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in 2020 International Conference on Advanced Computing and Applications (ACOMP)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2020 International Conference on Advanced Computing and Applications (ACOMP), Quy Nhon, Vietnam, 2020, pp. 49-54 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.12146">arXiv:2009.12146</a> <span> [<a href="https://arxiv.org/pdf/2009.12146">pdf</a>, <a href="https://arxiv.org/format/2009.12146">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> GEFA: Early Fusion Approach in Drug-Target Affinity Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tri Minh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Thin Nguyen</a>, <a href="/search/cs?searchtype=author&query=Le%2C+T+M">Thao Minh Le</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+T">Truyen Tran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.12146v2-abstract-short" style="display: inline;"> Predicting the interaction between a compound and a target is crucial for rapid drug repurposing. Deep learning has been successfully applied in drug-target affinity (DTA) problem. However, previous deep learning-based methods ignore modeling the direct interactions between drug and protein residues. This would lead to inaccurate learning of target representation which may change due to the drug b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.12146v2-abstract-full').style.display = 'inline'; document.getElementById('2009.12146v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.12146v2-abstract-full" style="display: none;"> Predicting the interaction between a compound and a target is crucial for rapid drug repurposing. Deep learning has been successfully applied in drug-target affinity (DTA) problem. However, previous deep learning-based methods ignore modeling the direct interactions between drug and protein residues. This would lead to inaccurate learning of target representation which may change due to the drug binding effects. In addition, previous DTA methods learn protein representation solely based on a small number of protein sequences in DTA datasets while neglecting the use of proteins outside of the DTA datasets. We propose GEFA (Graph Early Fusion Affinity), a novel graph-in-graph neural network with attention mechanism to address the changes in target representation because of the binding effects. Specifically, a drug is modeled as a graph of atoms, which then serves as a node in a larger graph of residues-drug complex. The resulting model is an expressive deep nested graph neural network. We also use pre-trained protein representation powered by the recent effort of learning contextualized protein representation. The experiments are conducted under different settings to evaluate scenarios such as novel drugs or targets. The results demonstrate the effectiveness of the pre-trained protein embedding and the advantages our GEFA in modeling the nested graph for drug-target interaction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.12146v2-abstract-full').style.display = 'none'; document.getElementById('2009.12146v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.06493">arXiv:2007.06493</a> <span> [<a href="https://arxiv.org/pdf/2007.06493">pdf</a>, <a href="https://arxiv.org/ps/2007.06493">ps</a>, <a href="https://arxiv.org/format/2007.06493">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> HSD Shared Task in VLSP Campaign 2019:Hate Speech Detection for Social Good </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Vu%2C+X">Xuan-Son Vu</a>, <a href="/search/cs?searchtype=author&query=Vu%2C+T">Thanh Vu</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+M">Mai-Vu Tran</a>, <a href="/search/cs?searchtype=author&query=Le-Cong%2C+T">Thanh Le-Cong</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+H+T+M">Huyen T M. Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.06493v1-abstract-short" style="display: inline;"> The paper describes the organisation of the "HateSpeech Detection" (HSD) task at the VLSP workshop 2019 on detecting the fine-grained presence of hate speech in Vietnamese textual items (i.e., messages) extracted from Facebook, which is the most popular social network site (SNS) in Vietnam. The task is organised as a multi-class classification task and based on a large-scale dataset containing 25,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.06493v1-abstract-full').style.display = 'inline'; document.getElementById('2007.06493v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.06493v1-abstract-full" style="display: none;"> The paper describes the organisation of the "HateSpeech Detection" (HSD) task at the VLSP workshop 2019 on detecting the fine-grained presence of hate speech in Vietnamese textual items (i.e., messages) extracted from Facebook, which is the most popular social network site (SNS) in Vietnam. The task is organised as a multi-class classification task and based on a large-scale dataset containing 25,431 Vietnamese textual items from Facebook. The task participants were challenged to build a classification model that is capable of classifying an item to one of 3 classes, i.e., "HATE", "OFFENSIVE" and "CLEAN". HSD attracted a large number of participants and was a popular task at VLSP 2019. In particular, there were 71 teams signed up for the task, 14 of them submitted results with 380 valid submissions from 20th September 2019 to 4th October 2019. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.06493v1-abstract-full').style.display = 'none'; document.getElementById('2007.06493v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.06919">arXiv:2006.06919</a> <span> [<a href="https://arxiv.org/pdf/2006.06919">pdf</a>, <a href="https://arxiv.org/format/2006.06919">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Dynamical Systems">math.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> MomentumRNN: Integrating Momentum into Recurrent Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Baraniuk%2C+R+G">Richard G. Baraniuk</a>, <a href="/search/cs?searchtype=author&query=Bertozzi%2C+A+L">Andrea L. Bertozzi</a>, <a href="/search/cs?searchtype=author&query=Osher%2C+S+J">Stanley J. Osher</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.06919v2-abstract-short" style="display: inline;"> Designing deep neural networks is an art that often involves an expensive search over candidate architectures. To overcome this for recurrent neural nets (RNNs), we establish a connection between the hidden state dynamics in an RNN and gradient descent (GD). We then integrate momentum into this framework and propose a new family of RNNs, called {\em MomentumRNNs}. We theoretically prove and numeri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.06919v2-abstract-full').style.display = 'inline'; document.getElementById('2006.06919v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.06919v2-abstract-full" style="display: none;"> Designing deep neural networks is an art that often involves an expensive search over candidate architectures. To overcome this for recurrent neural nets (RNNs), we establish a connection between the hidden state dynamics in an RNN and gradient descent (GD). We then integrate momentum into this framework and propose a new family of RNNs, called {\em MomentumRNNs}. We theoretically prove and numerically demonstrate that MomentumRNNs alleviate the vanishing gradient issue in training RNNs. We study the momentum long-short term memory (MomentumLSTM) and verify its advantages in convergence speed and accuracy over its LSTM counterpart across a variety of benchmarks. We also demonstrate that MomentumRNN is applicable to many types of recurrent cells, including those in the state-of-the-art orthogonal RNNs. Finally, we show that other advanced momentum-based optimization methods, such as Adam and Nesterov accelerated gradients with a restart, can be easily incorporated into the MomentumRNN framework for designing new recurrent cells with even better performance. The code is available at https://github.com/minhtannguyen/MomentumRNN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.06919v2-abstract-full').style.display = 'none'; document.getElementById('2006.06919v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 11 figures, Accepted for publication at Advances in Neural Information Processing Systems (NeurIPS) 2020</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T07 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Advances in Neural Information Processing Systems (NeurIPS) 2020 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.01403">arXiv:2004.01403</a> <span> [<a href="https://arxiv.org/pdf/2004.01403">pdf</a>, <a href="https://arxiv.org/ps/2004.01403">ps</a>, <a href="https://arxiv.org/format/2004.01403">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> A "Final" Security Bug </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+Q+T+M">Quan Thoi Minh Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.01403v1-abstract-short" style="display: inline;"> This article discusses a fixed critical security bug in Google Tink's Ed25519 Java implementation. The bug allows remote attackers to extract the private key with only two Ed25519 signatures. The vulnerability comes from the misunderstanding of what "final" in Java programming language means. The bug was discovered during security review before Google Tink was officially released. It reinforces th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.01403v1-abstract-full').style.display = 'inline'; document.getElementById('2004.01403v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.01403v1-abstract-full" style="display: none;"> This article discusses a fixed critical security bug in Google Tink's Ed25519 Java implementation. The bug allows remote attackers to extract the private key with only two Ed25519 signatures. The vulnerability comes from the misunderstanding of what "final" in Java programming language means. The bug was discovered during security review before Google Tink was officially released. It reinforces the challenge in writing safe cryptographic code and the importance of the security review process even for the code written by professional cryptographers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.01403v1-abstract-full').style.display = 'none'; document.getElementById('2004.01403v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2003.09019">arXiv:2003.09019</a> <span> [<a href="https://arxiv.org/pdf/2003.09019">pdf</a>, <a href="https://arxiv.org/format/2003.09019">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Intuitive Understanding of Quantum Computation and Post-Quantum Cryptography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+Q+T+M">Quan Thoi Minh Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2003.09019v3-abstract-short" style="display: inline;"> Post-quantum cryptography is inevitable. National Institute of Standards and Technology (NIST) starts standardizing quantum-resistant public-key cryptography (aka post-quantum cryptography). The reason is that investment in quantum computing is blooming which poses significant threats to our currently deployed cryptographic algorithms. As a security engineer, to prepare for the apocalypse in advan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.09019v3-abstract-full').style.display = 'inline'; document.getElementById('2003.09019v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2003.09019v3-abstract-full" style="display: none;"> Post-quantum cryptography is inevitable. National Institute of Standards and Technology (NIST) starts standardizing quantum-resistant public-key cryptography (aka post-quantum cryptography). The reason is that investment in quantum computing is blooming which poses significant threats to our currently deployed cryptographic algorithms. As a security engineer, to prepare for the apocalypse in advance, I've been watching the development of quantum computers and post-quantum cryptography closely. Never mind, I simply made up an excuse to study these fascinating scientific fields. However, they are extremely hard to understand, at least to an amateur like me. This article shares with you my notes with the hope that you will have an intuitive understanding of the beautiful and mind-blowing quantum algorithms and post-quantum cryptography. Update: Multivariate signature scheme Rainbow is broken by Ward Beullens. Supersingular Isogeny Diffie-Hellman protocol (SIDH) is broken by Wouter Castryck and Thomas Decru <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.09019v3-abstract-full').style.display = 'none'; document.getElementById('2003.09019v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Update: Multivariate signature scheme Rainbow is broken by Ward Beullens. Supersingular Isogeny Diffie-Hellman protocol (SIDH) is broken by Wouter Castryck and Thomas Decru</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.10583">arXiv:2002.10583</a> <span> [<a href="https://arxiv.org/pdf/2002.10583">pdf</a>, <a href="https://arxiv.org/format/2002.10583">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Scheduled Restart Momentum for Accelerated Stochastic Gradient Descent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bao Wang</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Bertozzi%2C+A+L">Andrea L. Bertozzi</a>, <a href="/search/cs?searchtype=author&query=Baraniuk%2C+R+G">Richard G. Baraniuk</a>, <a href="/search/cs?searchtype=author&query=Osher%2C+S+J">Stanley J. Osher</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.10583v2-abstract-short" style="display: inline;"> Stochastic gradient descent (SGD) with constant momentum and its variants such as Adam are the optimization algorithms of choice for training deep neural networks (DNNs). Since DNN training is incredibly computationally expensive, there is great interest in speeding up the convergence. Nesterov accelerated gradient (NAG) improves the convergence rate of gradient descent (GD) for convex optimizatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.10583v2-abstract-full').style.display = 'inline'; document.getElementById('2002.10583v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.10583v2-abstract-full" style="display: none;"> Stochastic gradient descent (SGD) with constant momentum and its variants such as Adam are the optimization algorithms of choice for training deep neural networks (DNNs). Since DNN training is incredibly computationally expensive, there is great interest in speeding up the convergence. Nesterov accelerated gradient (NAG) improves the convergence rate of gradient descent (GD) for convex optimization using a specially designed momentum; however, it accumulates error when an inexact gradient is used (such as in SGD), slowing convergence at best and diverging at worst. In this paper, we propose Scheduled Restart SGD (SRSGD), a new NAG-style scheme for training DNNs. SRSGD replaces the constant momentum in SGD by the increasing momentum in NAG but stabilizes the iterations by resetting the momentum to zero according to a schedule. Using a variety of models and benchmarks for image classification, we demonstrate that, in training DNNs, SRSGD significantly improves convergence and generalization; for instance in training ResNet200 for ImageNet classification, SRSGD achieves an error rate of 20.93% vs. the benchmark of 22.13%. These improvements become more significant as the network grows deeper. Furthermore, on both CIFAR and ImageNet, SRSGD reaches similar or even better error rates with significantly fewer training epochs compared to the SGD baseline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.10583v2-abstract-full').style.display = 'none'; document.getElementById('2002.10583v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">35 pages, 16 figures, 18 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1912.03978">arXiv:1912.03978</a> <span> [<a href="https://arxiv.org/pdf/1912.03978">pdf</a>, <a href="https://arxiv.org/format/1912.03978">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> InfoCNF: An Efficient Conditional Continuous Normalizing Flow with Adaptive Solvers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Tan M. Nguyen</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+A">Animesh Garg</a>, <a href="/search/cs?searchtype=author&query=Baraniuk%2C+R+G">Richard G. Baraniuk</a>, <a href="/search/cs?searchtype=author&query=Anandkumar%2C+A">Anima Anandkumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1912.03978v1-abstract-short" style="display: inline;"> Continuous Normalizing Flows (CNFs) have emerged as promising deep generative models for a wide range of tasks thanks to their invertibility and exact likelihood estimation. However, conditioning CNFs on signals of interest for conditional image generation and downstream predictive tasks is inefficient due to the high-dimensional latent code generated by the model, which needs to be of the same si… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.03978v1-abstract-full').style.display = 'inline'; document.getElementById('1912.03978v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1912.03978v1-abstract-full" style="display: none;"> Continuous Normalizing Flows (CNFs) have emerged as promising deep generative models for a wide range of tasks thanks to their invertibility and exact likelihood estimation. However, conditioning CNFs on signals of interest for conditional image generation and downstream predictive tasks is inefficient due to the high-dimensional latent code generated by the model, which needs to be of the same size as the input data. In this paper, we propose InfoCNF, an efficient conditional CNF that partitions the latent space into a class-specific supervised code and an unsupervised code that shared among all classes for efficient use of labeled information. Since the partitioning strategy (slightly) increases the number of function evaluations (NFEs), InfoCNF also employs gating networks to learn the error tolerances of its ordinary differential equation (ODE) solvers for better speed and performance. We show empirically that InfoCNF improves the test accuracy over the baseline while yielding comparable likelihood scores and reducing the NFEs on CIFAR10. Furthermore, applying the same partitioning strategy in InfoCNF on time-series data helps improve extrapolation performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.03978v1-abstract-full').style.display = 'none'; document.getElementById('1912.03978v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 14 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1812.00195">arXiv:1812.00195</a> <span> [<a href="https://arxiv.org/pdf/1812.00195">pdf</a>, <a href="https://arxiv.org/ps/1812.00195">ps</a>, <a href="https://arxiv.org/format/1812.00195">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> One for All: Neural Joint Modeling of Entities and Events </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M">Trung Minh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+H">Thien Huu Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1812.00195v1-abstract-short" style="display: inline;"> The previous work for event extraction has mainly focused on the predictions for event triggers and argument roles, treating entity mentions as being provided by human annotators. This is unrealistic as entity mentions are usually predicted by some existing toolkits whose errors might be propagated to the event trigger and argument role recognition. Few of the recent work has addressed this proble… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.00195v1-abstract-full').style.display = 'inline'; document.getElementById('1812.00195v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1812.00195v1-abstract-full" style="display: none;"> The previous work for event extraction has mainly focused on the predictions for event triggers and argument roles, treating entity mentions as being provided by human annotators. This is unrealistic as entity mentions are usually predicted by some existing toolkits whose errors might be propagated to the event trigger and argument role recognition. Few of the recent work has addressed this problem by jointly predicting entity mentions, event triggers and arguments. However, such work is limited to using discrete engineering features to represent contextual information for the individual tasks and their interactions. In this work, we propose a novel model to jointly perform predictions for entity mentions, event triggers and arguments based on the shared hidden representations from deep learning. The experiments demonstrate the benefits of the proposed method, leading to the state-of-the-art performance for event extraction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.00195v1-abstract-full').style.display = 'none'; document.getElementById('1812.00195v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at The Thirty-Third AAAI Conference on Artificial Intelligence (AAAI-19) (Honolulu, Hawaii, USA)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1711.10124">arXiv:1711.10124</a> <span> [<a href="https://arxiv.org/pdf/1711.10124">pdf</a>, <a href="https://arxiv.org/ps/1711.10124">ps</a>, <a href="https://arxiv.org/format/1711.10124">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Vietnamese Semantic Role Labelling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Le-Hong%2C+P">Phuong Le-Hong</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+T+H">Thai Hoang Pham</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+X+K">Xuan Khoai Pham</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M+H">Thi Minh Huyen Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+L">Thi Luong Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+M+H">Minh Hiep Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1711.10124v1-abstract-short" style="display: inline;"> In this paper, we study semantic role labelling (SRL), a subtask of semantic parsing of natural language sentences and its application for the Vietnamese language. We present our effort in building Vietnamese PropBank, the first Vietnamese SRL corpus and a software system for labelling semantic roles of Vietnamese texts. In particular, we present a novel constituent extraction algorithm in the arg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1711.10124v1-abstract-full').style.display = 'inline'; document.getElementById('1711.10124v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1711.10124v1-abstract-full" style="display: none;"> In this paper, we study semantic role labelling (SRL), a subtask of semantic parsing of natural language sentences and its application for the Vietnamese language. We present our effort in building Vietnamese PropBank, the first Vietnamese SRL corpus and a software system for labelling semantic roles of Vietnamese texts. In particular, we present a novel constituent extraction algorithm in the argument candidate identification step which is more suitable and more accurate than the common node-mapping method. In the machine learning part, our system integrates distributed word features produced by two recent unsupervised learning models in two learned statistical classifiers and makes use of integer linear programming inference procedure to improve the accuracy. The system is evaluated in a series of experiments and achieves a good result, an $F_1$ score of 74.77%. Our system, including corpus and software, is available as an open source project for free research and we believe that it is a good baseline for the development of future Vietnamese SRL systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1711.10124v1-abstract-full').style.display = 'none'; document.getElementById('1711.10124v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to the VNU Journal of Science</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1609.04604">arXiv:1609.04604</a> <span> [<a href="https://arxiv.org/pdf/1609.04604">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> WiFi-Direct Simulation for INET in OMNeT++ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Iskounen%2C+S">Syphax Iskounen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+M+T">Thi Mai Trang Nguyen</a>, <a href="/search/cs?searchtype=author&query=Monnet%2C+S">Sebastien Monnet</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1609.04604v1-abstract-short" style="display: inline;"> Wi-Fi Direct is a popular wireless technology which is integrated in most of today's smartphones and tablets. This technology allows a set of devices to dynamically negotiate and select a group owner which plays the role access point. This important feature is the strength of Wi-Fi Direct and makes it more and more widely used in telecommunications networks. In this paper, we present the implement… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1609.04604v1-abstract-full').style.display = 'inline'; document.getElementById('1609.04604v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1609.04604v1-abstract-full" style="display: none;"> Wi-Fi Direct is a popular wireless technology which is integrated in most of today's smartphones and tablets. This technology allows a set of devices to dynamically negotiate and select a group owner which plays the role access point. This important feature is the strength of Wi-Fi Direct and makes it more and more widely used in telecommunications networks. In this paper, we present the implementation of Wi-Fi Direct in the INET framework of OMNeT++. We have implemented the main procedures of Wi-Fi Direct such as discovery, negotiation and group formation. The implementation has been validated by two test scenarios which show the conformity of the implementation to the protocol specification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1609.04604v1-abstract-full').style.display = 'none'; document.getElementById('1609.04604v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2016. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in: A. Foerster, V. Vesely, A. Virdis, M. Kirsche (Eds.), Proc. of the 3rd OMNeT++ Community Summit, Brno University of Technology - Czech Republic - September 15-16, 2016</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> OMNET/2016/06 </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository