CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;24 of 24 results for author: <span class="mathjax">Lan, Y</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/q-bio" aria-role="search"> Searching in archive <strong>q-bio</strong>. <a href="/search/?searchtype=author&amp;query=Lan%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Lan, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Lan%2C+Y&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Lan, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10516">arXiv:2410.10516</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.10516">pdf</a>, <a href="https://arxiv.org/format/2410.10516">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> UniGEM: A Unified Approach to Generation and Property Prediction for Molecules </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Feng%2C+S">Shikun Feng</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ni%2C+Y">Yuyan Ni</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lu%2C+Y">Yan Lu</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+Z">Zhi-Ming Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+W">Wei-Ying Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10516v1-abstract-short" style="display: inline;"> Molecular generation and molecular property prediction are both crucial for drug discovery, but they are often developed independently. Inspired by recent studies, which demonstrate that diffusion model, a prominent generative approach, can learn meaningful data representations that enhance predictive tasks, we explore the potential for developing a unified generative model in the molecular domain&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10516v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10516v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10516v1-abstract-full" style="display: none;"> Molecular generation and molecular property prediction are both crucial for drug discovery, but they are often developed independently. Inspired by recent studies, which demonstrate that diffusion model, a prominent generative approach, can learn meaningful data representations that enhance predictive tasks, we explore the potential for developing a unified generative model in the molecular domain that effectively addresses both molecular generation and property prediction tasks. However, the integration of these tasks is challenging due to inherent inconsistencies, making simple multi-task learning ineffective. To address this, we propose UniGEM, the first unified model to successfully integrate molecular generation and property prediction, delivering superior performance in both tasks. Our key innovation lies in a novel two-phase generative process, where predictive tasks are activated in the later stages, after the molecular scaffold is formed. We further enhance task balance through innovative training strategies. Rigorous theoretical analysis and comprehensive experiments demonstrate our significant improvements in both tasks. The principles behind UniGEM hold promise for broader applications, including natural language processing and computer vision. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10516v1-abstract-full').style.display = 'none'; document.getElementById('2410.10516v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.08980">arXiv:2406.08980</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.08980">pdf</a>, <a href="https://arxiv.org/format/2406.08980">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> From Theory to Therapy: Reframing SBDD Model Evaluation via Practical Metrics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Gao%2C+B">Bowen Gao</a>, <a href="/search/q-bio?searchtype=author&amp;query=Tan%2C+H">Haichuan Tan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Huang%2C+Y">Yanwen Huang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ren%2C+M">Minsi Ren</a>, <a href="/search/q-bio?searchtype=author&amp;query=Huang%2C+X">Xiao Huang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+W">Wei-Ying Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Zhang%2C+Y">Ya-Qin Zhang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.08980v1-abstract-short" style="display: inline;"> Recent advancements in structure-based drug design (SBDD) have significantly enhanced the efficiency and precision of drug discovery by generating molecules tailored to bind specific protein pockets. Despite these technological strides, their practical application in real-world drug development remains challenging due to the complexities of synthesizing and testing these molecules. The reliability&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08980v1-abstract-full').style.display = 'inline'; document.getElementById('2406.08980v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.08980v1-abstract-full" style="display: none;"> Recent advancements in structure-based drug design (SBDD) have significantly enhanced the efficiency and precision of drug discovery by generating molecules tailored to bind specific protein pockets. Despite these technological strides, their practical application in real-world drug development remains challenging due to the complexities of synthesizing and testing these molecules. The reliability of the Vina docking score, the current standard for assessing binding abilities, is increasingly questioned due to its susceptibility to overfitting. To address these limitations, we propose a comprehensive evaluation framework that includes assessing the similarity of generated molecules to known active compounds, introducing a virtual screening-based metric for practical deployment capabilities, and re-evaluating binding affinity more rigorously. Our experiments reveal that while current SBDD models achieve high Vina scores, they fall short in practical usability metrics, highlighting a significant gap between theoretical predictions and real-world applicability. Our proposed metrics and dataset aim to bridge this gap, enhancing the practical applicability of future SBDD models and aligning them more closely with the needs of pharmaceutical research and development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08980v1-abstract-full').style.display = 'none'; document.getElementById('2406.08980v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.08961">arXiv:2406.08961</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.08961">pdf</a>, <a href="https://arxiv.org/format/2406.08961">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SIU: A Million-Scale Structural Small Molecule-Protein Interaction Dataset for Unbiased Bioactivity Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Huang%2C+Y">Yanwen Huang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Gao%2C+B">Bowen Gao</a>, <a href="/search/q-bio?searchtype=author&amp;query=Jia%2C+Y">Yinjun Jia</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+H">Hongbo Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+W">Wei-Ying Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Zhang%2C+Y">Ya-Qin Zhang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.08961v1-abstract-short" style="display: inline;"> Small molecules play a pivotal role in modern medicine, and scrutinizing their interactions with protein targets is essential for the discovery and development of novel, life-saving therapeutics. The term &#34;bioactivity&#34; encompasses various biological effects resulting from these interactions, including both binding and functional responses. The magnitude of bioactivity dictates the therapeutic or t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08961v1-abstract-full').style.display = 'inline'; document.getElementById('2406.08961v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.08961v1-abstract-full" style="display: none;"> Small molecules play a pivotal role in modern medicine, and scrutinizing their interactions with protein targets is essential for the discovery and development of novel, life-saving therapeutics. The term &#34;bioactivity&#34; encompasses various biological effects resulting from these interactions, including both binding and functional responses. The magnitude of bioactivity dictates the therapeutic or toxic pharmacological outcomes of small molecules, rendering accurate bioactivity prediction crucial for the development of safe and effective drugs. However, existing structural datasets of small molecule-protein interactions are often limited in scale and lack systematically organized bioactivity labels, thereby impeding our understanding of these interactions and precise bioactivity prediction. In this study, we introduce a comprehensive dataset of small molecule-protein interactions, consisting of over a million binding structures, each annotated with real biological activity labels. This dataset is designed to facilitate unbiased bioactivity prediction. We evaluated several classical models on this dataset, and the results demonstrate that the task of unbiased bioactivity prediction is challenging yet essential. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08961v1-abstract-full').style.display = 'none'; document.getElementById('2406.08961v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17802">arXiv:2405.17802</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.17802">pdf</a>, <a href="https://arxiv.org/format/2405.17802">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Multi-level Interaction Modeling for Protein Mutational Effect Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Mo%2C+Y">Yuanle Mo</a>, <a href="/search/q-bio?searchtype=author&amp;query=Hong%2C+X">Xin Hong</a>, <a href="/search/q-bio?searchtype=author&amp;query=Gao%2C+B">Bowen Gao</a>, <a href="/search/q-bio?searchtype=author&amp;query=Jia%2C+Y">Yinjun Jia</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17802v1-abstract-short" style="display: inline;"> Protein-protein interactions are central mediators in many biological processes. Accurately predicting the effects of mutations on interactions is crucial for guiding the modulation of these interactions, thereby playing a significant role in therapeutic development and drug discovery. Mutations generally affect interactions hierarchically across three levels: mutated residues exhibit different si&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17802v1-abstract-full').style.display = 'inline'; document.getElementById('2405.17802v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17802v1-abstract-full" style="display: none;"> Protein-protein interactions are central mediators in many biological processes. Accurately predicting the effects of mutations on interactions is crucial for guiding the modulation of these interactions, thereby playing a significant role in therapeutic development and drug discovery. Mutations generally affect interactions hierarchically across three levels: mutated residues exhibit different sidechain conformations, which lead to changes in the backbone conformation, eventually affecting the binding affinity between proteins. However, existing methods typically focus only on sidechain-level interaction modeling, resulting in suboptimal predictions. In this work, we propose a self-supervised multi-level pre-training framework, ProMIM, to fully capture all three levels of interactions with well-designed pretraining objectives. Experiments show ProMIM outperforms all the baselines on the standard benchmark, especially on mutations where significant changes in backbone conformations may occur. In addition, leading results from zero-shot evaluations for SARS-CoV-2 mutational effect prediction and antibody optimization underscore the potential of ProMIM as a powerful next-generation tool for developing novel therapeutic approaches and new drugs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17802v1-abstract-full').style.display = 'none'; document.getElementById('2405.17802v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.10343">arXiv:2405.10343</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.10343">pdf</a>, <a href="https://arxiv.org/format/2405.10343">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> UniCorn: A Unified Contrastive Learning Approach for Multi-view Molecular Representation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Feng%2C+S">Shikun Feng</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ni%2C+Y">Yuyan Ni</a>, <a href="/search/q-bio?searchtype=author&amp;query=Li%2C+M">Minghao Li</a>, <a href="/search/q-bio?searchtype=author&amp;query=Huang%2C+Y">Yanwen Huang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+Z">Zhi-Ming Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+W">Wei-Ying Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.10343v1-abstract-short" style="display: inline;"> Recently, a noticeable trend has emerged in developing pre-trained foundation models in the domains of CV and NLP. However, for molecular pre-training, there lacks a universal model capable of effectively applying to various categories of molecular tasks, since existing prevalent pre-training methods exhibit effectiveness for specific types of downstream tasks. Furthermore, the lack of profound un&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10343v1-abstract-full').style.display = 'inline'; document.getElementById('2405.10343v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.10343v1-abstract-full" style="display: none;"> Recently, a noticeable trend has emerged in developing pre-trained foundation models in the domains of CV and NLP. However, for molecular pre-training, there lacks a universal model capable of effectively applying to various categories of molecular tasks, since existing prevalent pre-training methods exhibit effectiveness for specific types of downstream tasks. Furthermore, the lack of profound understanding of existing pre-training methods, including 2D graph masking, 2D-3D contrastive learning, and 3D denoising, hampers the advancement of molecular foundation models. In this work, we provide a unified comprehension of existing pre-training methods through the lens of contrastive learning. Thus their distinctions lie in clustering different views of molecules, which is shown beneficial to specific downstream tasks. To achieve a complete and general-purpose molecular representation, we propose a novel pre-training framework, named UniCorn, that inherits the merits of the three methods, depicting molecular views in three different levels. SOTA performance across quantum, physicochemical, and biological tasks, along with comprehensive ablation study, validate the universality and effectiveness of UniCorn. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10343v1-abstract-full').style.display = 'none'; document.getElementById('2405.10343v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.14736">arXiv:2403.14736</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.14736">pdf</a>, <a href="https://arxiv.org/format/2403.14736">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NaNa and MiGu: Semantic Data Augmentation Techniques to Enhance Protein Classification in Graph Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yi-Shan Lan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Chen%2C+P">Pin-Yu Chen</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ho%2C+T">Tsung-Yi Ho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.14736v2-abstract-short" style="display: inline;"> Protein classification tasks are essential in drug discovery. Real-world protein structures are dynamic, which will determine the properties of proteins. However, the existing machine learning methods, like ProNet (Wang et al., 2022a), only access limited conformational characteristics and protein side-chain features, leading to impractical protein structure and inaccuracy of protein classes in th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14736v2-abstract-full').style.display = 'inline'; document.getElementById('2403.14736v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.14736v2-abstract-full" style="display: none;"> Protein classification tasks are essential in drug discovery. Real-world protein structures are dynamic, which will determine the properties of proteins. However, the existing machine learning methods, like ProNet (Wang et al., 2022a), only access limited conformational characteristics and protein side-chain features, leading to impractical protein structure and inaccuracy of protein classes in their predictions. In this paper, we propose novel semantic data augmentation methods, Novel Augmentation of New Node Attributes (NaNa), and Molecular Interactions and Geometric Upgrading (MiGu) to incorporate backbone chemical and side-chain biophysical information into protein classification tasks and a co-embedding residual learning framework. Specifically, we leverage molecular biophysical, secondary structure, chemical bonds, and ionic features of proteins to facilitate protein classification tasks. Furthermore, our semantic augmentation methods and the co-embedding residual learning framework can improve the performance of GIN (Xu et al., 2019) on EC and Fold datasets (Bairoch, 2000; Andreeva et al., 2007) by 16.41% and 11.33% respectively. Our code is available at https://github.com/r08b46009/Code_for_MIGU_NANA/tree/main. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14736v2-abstract-full').style.display = 'none'; document.getElementById('2403.14736v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.12987">arXiv:2403.12987</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.12987">pdf</a>, <a href="https://arxiv.org/format/2403.12987">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Specificity in SBDD: Leveraging Delta Score and Energy-Guided Diffusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Gao%2C+B">Bowen Gao</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ren%2C+M">Minsi Ren</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ni%2C+Y">Yuyan Ni</a>, <a href="/search/q-bio?searchtype=author&amp;query=Huang%2C+Y">Yanwen Huang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Qiang%2C+B">Bo Qiang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+Z">Zhi-Ming Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+W">Wei-Ying Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.12987v1-abstract-short" style="display: inline;"> In the field of Structure-based Drug Design (SBDD), deep learning-based generative models have achieved outstanding performance in terms of docking score. However, further study shows that the existing molecular generative methods and docking scores both have lacked consideration in terms of specificity, which means that generated molecules bind to almost every protein pocket with high affinity. T&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12987v1-abstract-full').style.display = 'inline'; document.getElementById('2403.12987v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.12987v1-abstract-full" style="display: none;"> In the field of Structure-based Drug Design (SBDD), deep learning-based generative models have achieved outstanding performance in terms of docking score. However, further study shows that the existing molecular generative methods and docking scores both have lacked consideration in terms of specificity, which means that generated molecules bind to almost every protein pocket with high affinity. To address this, we introduce the Delta Score, a new metric for evaluating the specificity of molecular binding. To further incorporate this insight for generation, we develop an innovative energy-guided approach using contrastive learning, with active compounds as decoys, to direct generative models toward creating molecules with high specificity. Our empirical results show that this method not only enhances the delta score but also maintains or improves traditional docking scores, successfully bridging the gap between SBDD and real-world needs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12987v1-abstract-full').style.display = 'none'; document.getElementById('2403.12987v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.13779">arXiv:2402.13779</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.13779">pdf</a>, <a href="https://arxiv.org/format/2402.13779">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Contextual Molecule Representation Learning from Chemical Reaction Knowledge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Tang%2C+H">Han Tang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Feng%2C+S">Shikun Feng</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lin%2C+B">Bicheng Lin</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ni%2C+Y">Yuyan Ni</a>, <a href="/search/q-bio?searchtype=author&amp;query=Liu%2C+J">JIngjing Liu</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+W">Wei-Ying Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.13779v1-abstract-short" style="display: inline;"> In recent years, self-supervised learning has emerged as a powerful tool to harness abundant unlabelled data for representation learning and has been broadly adopted in diverse areas. However, when applied to molecular representation learning (MRL), prevailing techniques such as masked sub-unit reconstruction often fall short, due to the high degree of freedom in the possible combinations of atoms&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13779v1-abstract-full').style.display = 'inline'; document.getElementById('2402.13779v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.13779v1-abstract-full" style="display: none;"> In recent years, self-supervised learning has emerged as a powerful tool to harness abundant unlabelled data for representation learning and has been broadly adopted in diverse areas. However, when applied to molecular representation learning (MRL), prevailing techniques such as masked sub-unit reconstruction often fall short, due to the high degree of freedom in the possible combinations of atoms within molecules, which brings insurmountable complexity to the masking-reconstruction paradigm. To tackle this challenge, we introduce REMO, a self-supervised learning framework that takes advantage of well-defined atom-combination rules in common chemistry. Specifically, REMO pre-trains graph/Transformer encoders on 1.7 million known chemical reactions in the literature. We propose two pre-training objectives: Masked Reaction Centre Reconstruction (MRCR) and Reaction Centre Identification (RCI). REMO offers a novel solution to MRL by exploiting the underlying shared patterns in chemical reactions as \textit{context} for pre-training, which effectively infers meaningful representations of common chemistry knowledge. Such contextual representations can then be utilized to support diverse downstream molecular tasks with minimum finetuning, such as affinity prediction and drug-drug interaction prediction. Extensive experimental results on MoleculeACE, ACNet, drug-drug interaction (DDI), and reaction type classification show that across all tested downstream tasks, REMO outperforms the standard baseline of single-molecule masked modeling used in current MRL. Remarkably, REMO is the pioneering deep learning model surpassing fingerprint-based methods in activity cliff benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13779v1-abstract-full').style.display = 'none'; document.getElementById('2402.13779v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint. Under Review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.16160">arXiv:2311.16160</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.16160">pdf</a>, <a href="https://arxiv.org/format/2311.16160">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Protein-ligand binding representation learning from fine-grained interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Feng%2C+S">Shikun Feng</a>, <a href="/search/q-bio?searchtype=author&amp;query=Li%2C+M">Minghao Li</a>, <a href="/search/q-bio?searchtype=author&amp;query=Jia%2C+Y">Yinjun Jia</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+W">Weiying Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.16160v1-abstract-short" style="display: inline;"> The binding between proteins and ligands plays a crucial role in the realm of drug discovery. Previous deep learning approaches have shown promising results over traditional computationally intensive methods, but resulting in poor generalization due to limited supervised data. In this paper, we propose to learn protein-ligand binding representation in a self-supervised learning manner. Different f&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16160v1-abstract-full').style.display = 'inline'; document.getElementById('2311.16160v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.16160v1-abstract-full" style="display: none;"> The binding between proteins and ligands plays a crucial role in the realm of drug discovery. Previous deep learning approaches have shown promising results over traditional computationally intensive methods, but resulting in poor generalization due to limited supervised data. In this paper, we propose to learn protein-ligand binding representation in a self-supervised learning manner. Different from existing pre-training approaches which treat proteins and ligands individually, we emphasize to discern the intricate binding patterns from fine-grained interactions. Specifically, this self-supervised learning problem is formulated as a prediction of the conclusive binding complex structure given a pocket and ligand with a Transformer based interaction module, which naturally emulates the binding process. To ensure the representation of rich binding information, we introduce two pre-training tasks, i.e.~atomic pairwise distance map prediction and mask ligand reconstruction, which comprehensively model the fine-grained interactions from both structure and feature space. Extensive experiments have demonstrated the superiority of our method across various binding tasks, including protein-ligand affinity prediction, virtual screening and protein-ligand docking. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16160v1-abstract-full').style.display = 'none'; document.getElementById('2311.16160v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.12035">arXiv:2311.12035</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.12035">pdf</a>, <a href="https://arxiv.org/format/2311.12035">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Delta Score: Improving the Binding Assessment of Structure-Based Drug Design Methods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Ren%2C+M">Minsi Ren</a>, <a href="/search/q-bio?searchtype=author&amp;query=Gao%2C+B">Bowen Gao</a>, <a href="/search/q-bio?searchtype=author&amp;query=Qiang%2C+B">Bo Qiang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.12035v1-abstract-short" style="display: inline;"> Structure-based drug design (SBDD) stands at the forefront of drug discovery, emphasizing the creation of molecules that target specific binding pockets. Recent advances in this area have witnessed the adoption of deep generative models and geometric deep learning techniques, modeling SBDD as a conditional generation task where the target structure serves as context. Historically, evaluation of th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12035v1-abstract-full').style.display = 'inline'; document.getElementById('2311.12035v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.12035v1-abstract-full" style="display: none;"> Structure-based drug design (SBDD) stands at the forefront of drug discovery, emphasizing the creation of molecules that target specific binding pockets. Recent advances in this area have witnessed the adoption of deep generative models and geometric deep learning techniques, modeling SBDD as a conditional generation task where the target structure serves as context. Historically, evaluation of these models centered on docking scores, which quantitatively depict the predicted binding affinity between a molecule and its target pocket. Though state-of-the-art models purport that a majority of their generated ligands exceed the docking score of ground truth ligands in test sets, it begs the question: Do these scores align with real-world biological needs? In this paper, we introduce the delta score, a novel evaluation metric grounded in tangible pharmaceutical requisites. Our experiments reveal that molecules produced by current deep generative models significantly lag behind ground truth reference ligands when assessed with the delta score. This novel metric not only complements existing benchmarks but also provides a pivotal direction for subsequent research in the domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12035v1-abstract-full').style.display = 'none'; document.getElementById('2311.12035v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.02124">arXiv:2311.02124</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.02124">pdf</a>, <a href="https://arxiv.org/format/2311.02124">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Sliced Denoising: A Physics-Informed Molecular Pre-Training Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Ni%2C+Y">Yuyan Ni</a>, <a href="/search/q-bio?searchtype=author&amp;query=Feng%2C+S">Shikun Feng</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+W">Wei-Ying Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+Z">Zhi-Ming Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.02124v1-abstract-short" style="display: inline;"> While molecular pre-training has shown great potential in enhancing drug discovery, the lack of a solid physical interpretation in current methods raises concerns about whether the learned representation truly captures the underlying explanatory factors in observed data, ultimately resulting in limited generalization and robustness. Although denoising methods offer a physical interpretation, their&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02124v1-abstract-full').style.display = 'inline'; document.getElementById('2311.02124v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.02124v1-abstract-full" style="display: none;"> While molecular pre-training has shown great potential in enhancing drug discovery, the lack of a solid physical interpretation in current methods raises concerns about whether the learned representation truly captures the underlying explanatory factors in observed data, ultimately resulting in limited generalization and robustness. Although denoising methods offer a physical interpretation, their accuracy is often compromised by ad-hoc noise design, leading to inaccurate learned force fields. To address this limitation, this paper proposes a new method for molecular pre-training, called sliced denoising (SliDe), which is based on the classical mechanical intramolecular potential theory. SliDe utilizes a novel noise strategy that perturbs bond lengths, angles, and torsion angles to achieve better sampling over conformations. Additionally, it introduces a random slicing approach that circumvents the computationally expensive calculation of the Jacobian matrix, which is otherwise essential for estimating the force field. By aligning with physical principles, SliDe shows a 42\% improvement in the accuracy of estimated force fields compared to current state-of-the-art denoising methods, and thus outperforms traditional baselines on various molecular property prediction tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02124v1-abstract-full').style.display = 'none'; document.getElementById('2311.02124v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.14216">arXiv:2310.14216</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.14216">pdf</a>, <a href="https://arxiv.org/format/2310.14216">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> UniMAP: Universal SMILES-Graph Representation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Feng%2C+S">Shikun Feng</a>, <a href="/search/q-bio?searchtype=author&amp;query=Yang%2C+L">Lixin Yang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Huang%2C+Y">Yanwen Huang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ni%2C+Y">Yuyan Ni</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+W">Weiying Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.14216v2-abstract-short" style="display: inline;"> Molecular representation learning is fundamental for many drug related applications. Most existing molecular pre-training models are limited in using single molecular modality, either SMILES or graph representation. To effectively leverage both modalities, we argue that it is critical to capture the fine-grained &#39;semantics&#39; between SMILES and graph, because subtle sequence/graph differences may le&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14216v2-abstract-full').style.display = 'inline'; document.getElementById('2310.14216v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.14216v2-abstract-full" style="display: none;"> Molecular representation learning is fundamental for many drug related applications. Most existing molecular pre-training models are limited in using single molecular modality, either SMILES or graph representation. To effectively leverage both modalities, we argue that it is critical to capture the fine-grained &#39;semantics&#39; between SMILES and graph, because subtle sequence/graph differences may lead to contrary molecular properties. In this paper, we propose a universal SMILE-graph representation learning model, namely UniMAP. Firstly, an embedding layer is employed to obtain the token and node/edge representation in SMILES and graph, respectively. A multi-layer Transformer is then utilized to conduct deep cross-modality fusion. Specially, four kinds of pre-training tasks are designed for UniMAP, including Multi-Level Cross-Modality Masking (CMM), SMILES-Graph Matching (SGM), Fragment-Level Alignment (FLA), and Domain Knowledge Learning (DKL). In this way, both global (i.e. SGM and DKL) and local (i.e. CMM and FLA) alignments are integrated to achieve comprehensive cross-modality fusion. We evaluate UniMAP on various downstream tasks, i.e. molecular property prediction, drug-target affinity prediction and drug-drug interaction. Experimental results show that UniMAP outperforms current state-of-the-art pre-training methods.We also visualize the learned representations to demonstrate the effect of multi-modality integration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14216v2-abstract-full').style.display = 'none'; document.getElementById('2310.14216v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.02510">arXiv:2308.02510</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.02510">pdf</a>, <a href="https://arxiv.org/format/2308.02510">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Seeing through the Brain: Image Reconstruction of Visual Perception from Human Brain Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yu-Ting Lan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ren%2C+K">Kan Ren</a>, <a href="/search/q-bio?searchtype=author&amp;query=Wang%2C+Y">Yansen Wang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Zheng%2C+W">Wei-Long Zheng</a>, <a href="/search/q-bio?searchtype=author&amp;query=Li%2C+D">Dongsheng Li</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lu%2C+B">Bao-Liang Lu</a>, <a href="/search/q-bio?searchtype=author&amp;query=Qiu%2C+L">Lili Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.02510v2-abstract-short" style="display: inline;"> Seeing is believing, however, the underlying mechanism of how human visual perceptions are intertwined with our cognitions is still a mystery. Thanks to the recent advances in both neuroscience and artificial intelligence, we have been able to record the visually evoked brain activities and mimic the visual perception ability through computational approaches. In this paper, we pay attention to vis&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.02510v2-abstract-full').style.display = 'inline'; document.getElementById('2308.02510v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.02510v2-abstract-full" style="display: none;"> Seeing is believing, however, the underlying mechanism of how human visual perceptions are intertwined with our cognitions is still a mystery. Thanks to the recent advances in both neuroscience and artificial intelligence, we have been able to record the visually evoked brain activities and mimic the visual perception ability through computational approaches. In this paper, we pay attention to visual stimuli reconstruction by reconstructing the observed images based on portably accessible brain signals, i.e., electroencephalography (EEG) data. Since EEG signals are dynamic in the time-series format and are notorious to be noisy, processing and extracting useful information requires more dedicated efforts; In this paper, we propose a comprehensive pipeline, named NeuroImagen, for reconstructing visual stimuli images from EEG signals. Specifically, we incorporate a novel multi-level perceptual information decoding to draw multi-grained outputs from the given EEG data. A latent diffusion model will then leverage the extracted information to reconstruct the high-resolution visual stimuli images. The experimental results have illustrated the effectiveness of image reconstruction and superior quantitative performance of our proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.02510v2-abstract-full').style.display = 'none'; document.getElementById('2308.02510v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">A preprint version of an ongoing work</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.10683">arXiv:2307.10683</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.10683">pdf</a>, <a href="https://arxiv.org/format/2307.10683">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Fractional Denoising for 3D Molecular Pre-training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Feng%2C+S">Shikun Feng</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ni%2C+Y">Yuyan Ni</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+Z">Zhi-Ming Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+W">Wei-Ying Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.10683v3-abstract-short" style="display: inline;"> Coordinate denoising is a promising 3D molecular pre-training method, which has achieved remarkable performance in various downstream drug discovery tasks. Theoretically, the objective is equivalent to learning the force field, which is revealed helpful for downstream tasks. Nevertheless, there are two challenges for coordinate denoising to learn an effective force field, i.e. low coverage samples&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.10683v3-abstract-full').style.display = 'inline'; document.getElementById('2307.10683v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.10683v3-abstract-full" style="display: none;"> Coordinate denoising is a promising 3D molecular pre-training method, which has achieved remarkable performance in various downstream drug discovery tasks. Theoretically, the objective is equivalent to learning the force field, which is revealed helpful for downstream tasks. Nevertheless, there are two challenges for coordinate denoising to learn an effective force field, i.e. low coverage samples and isotropic force field. The underlying reason is that molecular distributions assumed by existing denoising methods fail to capture the anisotropic characteristic of molecules. To tackle these challenges, we propose a novel hybrid noise strategy, including noises on both dihedral angel and coordinate. However, denoising such hybrid noise in a traditional way is no more equivalent to learning the force field. Through theoretical deductions, we find that the problem is caused by the dependency of the input conformation for covariance. To this end, we propose to decouple the two types of noise and design a novel fractional denoising method (Frad), which only denoises the latter coordinate part. In this way, Frad enjoys both the merits of sampling more low-energy structures and the force field equivalence. Extensive experiments show the effectiveness of Frad in molecular representation, with a new state-of-the-art on 9 out of 12 tasks of QM9 and on 7 out of 8 targets of MD17. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.10683v3-abstract-full').style.display = 'none'; document.getElementById('2307.10683v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.06235">arXiv:2307.06235</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.06235">pdf</a>, <a href="https://arxiv.org/format/2307.06235">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Molecular Pretraining via Modality Blending </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Yu%2C+Q">Qiying Yu</a>, <a href="/search/q-bio?searchtype=author&amp;query=Zhang%2C+Y">Yudi Zhang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ni%2C+Y">Yuyan Ni</a>, <a href="/search/q-bio?searchtype=author&amp;query=Feng%2C+S">Shikun Feng</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/q-bio?searchtype=author&amp;query=Liu%2C+J">Jingjing Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.06235v2-abstract-short" style="display: inline;"> Self-supervised learning has recently gained growing interest in molecular modeling for scientific tasks such as AI-assisted drug discovery. Current studies consider leveraging both 2D and 3D molecular structures for representation learning. However, relying on straightforward alignment strategies that treat each modality separately, these methods fail to exploit the intrinsic correlation between&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.06235v2-abstract-full').style.display = 'inline'; document.getElementById('2307.06235v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.06235v2-abstract-full" style="display: none;"> Self-supervised learning has recently gained growing interest in molecular modeling for scientific tasks such as AI-assisted drug discovery. Current studies consider leveraging both 2D and 3D molecular structures for representation learning. However, relying on straightforward alignment strategies that treat each modality separately, these methods fail to exploit the intrinsic correlation between 2D and 3D representations that reflect the underlying structural characteristics of molecules, and only perform coarse-grained molecule-level alignment. To derive fine-grained alignment and promote structural molecule understanding, we introduce an atomic-relation level &#34;blend-then-predict&#34; self-supervised learning approach, MoleBLEND, which first blends atom relations represented by different modalities into one unified relation matrix for joint encoding, then recovers modality-specific information for 2D and 3D structures individually. By treating atom relationships as anchors, MoleBLEND organically aligns and integrates visually dissimilar 2D and 3D modalities of the same molecule at fine-grained atomic level, painting a more comprehensive depiction of each molecule. Extensive experiments show that MoleBLEND achieves state-of-the-art performance across major 2D/3D molecular benchmarks. We further provide theoretical insights from the perspective of mutual-information maximization, demonstrating that our method unifies contrastive, generative (cross-modality prediction) and mask-then-predict (single-modality prediction) objectives into one single cohesive framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.06235v2-abstract-full').style.display = 'none'; document.getElementById('2307.06235v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.13266">arXiv:2305.13266</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.13266">pdf</a>, <a href="https://arxiv.org/format/2305.13266">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Coarse-to-Fine: a Hierarchical Diffusion Model for Molecule Generation in 3D </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Qiang%2C+B">Bo Qiang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y">Yuxuan Song</a>, <a href="/search/q-bio?searchtype=author&amp;query=Xu%2C+M">Minkai Xu</a>, <a href="/search/q-bio?searchtype=author&amp;query=Gong%2C+J">Jingjing Gong</a>, <a href="/search/q-bio?searchtype=author&amp;query=Gao%2C+B">Bowen Gao</a>, <a href="/search/q-bio?searchtype=author&amp;query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+W">Weiying Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.13266v2-abstract-short" style="display: inline;"> Generating desirable molecular structures in 3D is a fundamental problem for drug discovery. Despite the considerable progress we have achieved, existing methods usually generate molecules in atom resolution and ignore intrinsic local structures such as rings, which leads to poor quality in generated structures, especially when generating large molecules. Fragment-based molecule generation is a pr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13266v2-abstract-full').style.display = 'inline'; document.getElementById('2305.13266v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.13266v2-abstract-full" style="display: none;"> Generating desirable molecular structures in 3D is a fundamental problem for drug discovery. Despite the considerable progress we have achieved, existing methods usually generate molecules in atom resolution and ignore intrinsic local structures such as rings, which leads to poor quality in generated structures, especially when generating large molecules. Fragment-based molecule generation is a promising strategy, however, it is nontrivial to be adapted for 3D non-autoregressive generations because of the combinational optimization problems. In this paper, we utilize a coarse-to-fine strategy to tackle this problem, in which a Hierarchical Diffusion-based model (i.e.~HierDiff) is proposed to preserve the validity of local segments without relying on autoregressive modeling. Specifically, HierDiff first generates coarse-grained molecule geometries via an equivariant diffusion process, where each coarse-grained node reflects a fragment in a molecule. Then the coarse-grained nodes are decoded into fine-grained fragments by a message-passing process and a newly designed iterative refined sampling module. Lastly, the fine-grained fragments are then assembled to derive a complete atomic molecular structure. Extensive experiments demonstrate that HierDiff consistently improves the quality of molecule generation over existing methods <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13266v2-abstract-full').style.display = 'none'; document.getElementById('2305.13266v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2023 poster</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.01978">arXiv:2211.01978</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.01978">pdf</a>, <a href="https://arxiv.org/format/2211.01978">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3511808.3557142">10.1145/3511808.3557142 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> PEMP: Leveraging Physics Properties to Enhance Molecular Property Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Sun%2C+Y">Yuancheng Sun</a>, <a href="/search/q-bio?searchtype=author&amp;query=Chen%2C+Y">Yimeng Chen</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+W">Weizhi Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Huang%2C+W">Wenhao Huang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Liu%2C+K">Kang Liu</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+Z">Zhiming Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ma%2C+W">Wei-Ying Ma</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.01978v1-abstract-short" style="display: inline;"> Molecular property prediction is essential for drug discovery. In recent years, deep learning methods have been introduced to this area and achieved state-of-the-art performances. However, most of existing methods ignore the intrinsic relations between molecular properties which can be utilized to improve the performances of corresponding prediction tasks. In this paper, we propose a new approach,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.01978v1-abstract-full').style.display = 'inline'; document.getElementById('2211.01978v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.01978v1-abstract-full" style="display: none;"> Molecular property prediction is essential for drug discovery. In recent years, deep learning methods have been introduced to this area and achieved state-of-the-art performances. However, most of existing methods ignore the intrinsic relations between molecular properties which can be utilized to improve the performances of corresponding prediction tasks. In this paper, we propose a new approach, namely Physics properties Enhanced Molecular Property prediction (PEMP), to utilize relations between molecular properties revealed by previous physics theory and physical chemistry studies. Specifically, we enhance the training of the chemical and physiological property predictors with related physics property prediction tasks. We design two different methods for PEMP, respectively based on multi-task learning and transfer learning. Both methods include a model-agnostic molecule representation module and a property prediction module. In our implementation, we adopt both the state-of-the-art molecule embedding models under the supervised learning paradigm and the pretraining paradigm as the molecule representation module of PEMP, respectively. Experimental results on public benchmark MoleculeNet show that the proposed methods have the ability to outperform corresponding state-of-the-art models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.01978v1-abstract-full').style.display = 'none'; document.getElementById('2211.01978v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages. Published in CIKM 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.05231">arXiv:2110.05231</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2110.05231">pdf</a>, <a href="https://arxiv.org/format/2110.05231">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multi-modal Self-supervised Pre-training for Regulatory Genome Across Cell Types </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Mo%2C+S">Shentong Mo</a>, <a href="/search/q-bio?searchtype=author&amp;query=Fu%2C+X">Xi Fu</a>, <a href="/search/q-bio?searchtype=author&amp;query=Hong%2C+C">Chenyang Hong</a>, <a href="/search/q-bio?searchtype=author&amp;query=Chen%2C+Y">Yizhen Chen</a>, <a href="/search/q-bio?searchtype=author&amp;query=Zheng%2C+Y">Yuxuan Zheng</a>, <a href="/search/q-bio?searchtype=author&amp;query=Tang%2C+X">Xiangru Tang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Shen%2C+Z">Zhiqiang Shen</a>, <a href="/search/q-bio?searchtype=author&amp;query=Xing%2C+E+P">Eric P Xing</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.05231v2-abstract-short" style="display: inline;"> In the genome biology research, regulatory genome modeling is an important topic for many regulatory downstream tasks, such as promoter classification, transaction factor binding sites prediction. The core problem is to model how regulatory elements interact with each other and its variability across different cell types. However, current deep learning methods often focus on modeling genome sequen&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.05231v2-abstract-full').style.display = 'inline'; document.getElementById('2110.05231v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.05231v2-abstract-full" style="display: none;"> In the genome biology research, regulatory genome modeling is an important topic for many regulatory downstream tasks, such as promoter classification, transaction factor binding sites prediction. The core problem is to model how regulatory elements interact with each other and its variability across different cell types. However, current deep learning methods often focus on modeling genome sequences of a fixed set of cell types and do not account for the interaction between multiple regulatory elements, making them only perform well on the cell types in the training set and lack the generalizability required in biological applications. In this work, we propose a simple yet effective approach for pre-training genome data in a multi-modal and self-supervised manner, which we call GeneBERT. Specifically, we simultaneously take the 1d sequence of genome data and a 2d matrix of (transcription factors x regions) as the input, where three pre-training tasks are proposed to improve the robustness and generalizability of our model. We pre-train our model on the ATAC-seq dataset with 17 million genome sequences. We evaluate our GeneBERT on regulatory downstream tasks across different cell types, including promoter classification, transaction factor binding sites prediction, disease risk estimation, and splicing sites prediction. Extensive experiments demonstrate the effectiveness of multi-modal and self-supervised pre-training for large-scale regulatory genomics data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.05231v2-abstract-full').style.display = 'none'; document.getElementById('2110.05231v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.06395">arXiv:2108.06395</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2108.06395">pdf</a>, <a href="https://arxiv.org/ps/2108.06395">ps</a>, <a href="https://arxiv.org/format/2108.06395">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chaotic Dynamics">nlin.CD</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1103/PhysRevE.105.L052201">10.1103/PhysRevE.105.L052201 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Criticality in Reservoir Computer of Coupled Phase Oscillators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Wang%2C+L">Liang Wang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Fan%2C+H">Huawei Fan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Xiao%2C+J">Jinghua Xiao</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yueheng Lan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Wang%2C+X">Xingang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.06395v1-abstract-short" style="display: inline;"> Accumulating evidences show that the cerebral cortex is operating near a critical state featured by power-law size distribution of neural avalanche activities, yet evidence of this critical state in artificial neural networks mimicking the cerebral cortex is lacking. Here we design an artificial neural network of coupled phase oscillators and, by the technique of reservoir computing in machine lea&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.06395v1-abstract-full').style.display = 'inline'; document.getElementById('2108.06395v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.06395v1-abstract-full" style="display: none;"> Accumulating evidences show that the cerebral cortex is operating near a critical state featured by power-law size distribution of neural avalanche activities, yet evidence of this critical state in artificial neural networks mimicking the cerebral cortex is lacking. Here we design an artificial neural network of coupled phase oscillators and, by the technique of reservoir computing in machine learning, train it for predicting chaos. It is found that when the machine is properly trained, oscillators in the reservoir are synchronized into clusters whose sizes follow a power-law distribution. This feature, however, is absent when the machine is poorly trained. Additionally, it is found that despite the synchronization degree of the original network, once properly trained, the reservoir network is always developed to the same critical state, exemplifying the &#34;attractor&#34; nature of this state in machine learning. The generality of the results is verified in different reservoir models and by different target systems, and it is found that the scaling exponent of the distribution is independent on the reservoir details and the bifurcation parameter of the target system, but is modified when the dynamics of the target system is changed to a different type. The findings shed lights on the nature of machine learning, and are helpful to the design of high-performance machine in physical systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.06395v1-abstract-full').style.display = 'none'; document.getElementById('2108.06395v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1505.07526">arXiv:1505.07526</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1505.07526">pdf</a>, <a href="https://arxiv.org/format/1505.07526">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Subcellular Processes">q-bio.SC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistical Mechanics">cond-mat.stat-mech</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1088/1742-5468/2015/00/P07025">10.1088/1742-5468/2015/00/P07025 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Energy dissipation in an adaptive molecular circuit </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Wang%2C+S">Shou-Wen Wang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yueheng Lan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Tang%2C+L">Lei-Han Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1505.07526v1-abstract-short" style="display: inline;"> The ability to monitor nutrient and other environmental conditions with high sensitivity is crucial for cell growth and survival. Sensory adaptation allows a cell to recover its sensitivity after a transient response to a shift in the strength of extracellular stimulus. The working principles of adaptation have been established previously based on rate equations which do not consider fluctuations&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1505.07526v1-abstract-full').style.display = 'inline'; document.getElementById('1505.07526v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1505.07526v1-abstract-full" style="display: none;"> The ability to monitor nutrient and other environmental conditions with high sensitivity is crucial for cell growth and survival. Sensory adaptation allows a cell to recover its sensitivity after a transient response to a shift in the strength of extracellular stimulus. The working principles of adaptation have been established previously based on rate equations which do not consider fluctuations in a thermal environment. Recently, G. Lan et al. (Nature Phys., 8:422-8, 2012) performed a detailed analysis of a stochastic model for the E. coli sensory network. They showed that accurate adaptation is possible only when the system operates in a nonequilibrium steady-state (NESS). They further proposed an energy-speed-accuracy (ESA) trade-off relation. We present here analytic results on the NESS of the model through a mapping to a one-dimensional birth-death process. An exact expression for the entropy production rate is also derived. Based on these results, we are able to discuss the ESA relation in a more general setting. Our study suggests that the adaptation error can be reduced exponentially as the methylation range increases. Finally, we show that a nonequilibrium phase transition exists in the infinite methylation range limit, despite the fact that the model contains only two discrete variables. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1505.07526v1-abstract-full').style.display = 'none'; document.getElementById('1505.07526v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> J. Stat. Mech. Theor. Exp. 2015, P07025 (2015) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1407.0387">arXiv:1407.0387</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1407.0387">pdf</a>, <a href="https://arxiv.org/format/1407.0387">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1093/bioinformatics/btu320">10.1093/bioinformatics/btu320 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> SEK: Sparsity exploiting $k$-mer-based estimation of bacterial community composition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Chatterjee%2C+S">Saikat Chatterjee</a>, <a href="/search/q-bio?searchtype=author&amp;query=Koslicki%2C+D">David Koslicki</a>, <a href="/search/q-bio?searchtype=author&amp;query=Dong%2C+S">Siyuan Dong</a>, <a href="/search/q-bio?searchtype=author&amp;query=Innocenti%2C+N">Nicolas Innocenti</a>, <a href="/search/q-bio?searchtype=author&amp;query=Cheng%2C+L">Lu Cheng</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yueheng Lan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Vehkaper%C3%A4%2C+M">Mikko Vehkaper盲</a>, <a href="/search/q-bio?searchtype=author&amp;query=Skoglund%2C+M">Mikael Skoglund</a>, <a href="/search/q-bio?searchtype=author&amp;query=Rasmussen%2C+L+K">Lars K. Rasmussen</a>, <a href="/search/q-bio?searchtype=author&amp;query=Aurell%2C+E">Erik Aurell</a>, <a href="/search/q-bio?searchtype=author&amp;query=Corander%2C+J">Jukka Corander</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1407.0387v1-abstract-short" style="display: inline;"> Motivation: Estimation of bacterial community composition from a high-throughput sequenced sample is an important task in metagenomics applications. Since the sample sequence data typically harbors reads of variable lengths and different levels of biological and technical noise, accurate statistical analysis of such data is challenging. Currently popular estimation methods are typically very time&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1407.0387v1-abstract-full').style.display = 'inline'; document.getElementById('1407.0387v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1407.0387v1-abstract-full" style="display: none;"> Motivation: Estimation of bacterial community composition from a high-throughput sequenced sample is an important task in metagenomics applications. Since the sample sequence data typically harbors reads of variable lengths and different levels of biological and technical noise, accurate statistical analysis of such data is challenging. Currently popular estimation methods are typically very time consuming in a desktop computing environment. Results: Using sparsity enforcing methods from the general sparse signal processing field (such as compressed sensing), we derive a solution to the community composition estimation problem by a simultaneous assignment of all sample reads to a pre-processed reference database. A general statistical model based on kernel density estimation techniques is introduced for the assignment task and the model solution is obtained using convex optimization tools. Further, we design a greedy algorithm solution for a fast solution. Our approach offers a reasonably fast community composition estimation method which is shown to be more robust to input data variation than a recently introduced related method. Availability: A platform-independent Matlab implementation of the method is freely available at http://www.ee.kth.se/ctsoftware; source code that does not require access to Matlab is currently being tested and will be made available later through the above website. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1407.0387v1-abstract-full').style.display = 'none'; document.getElementById('1407.0387v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2014. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1211.1281">arXiv:1211.1281</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1211.1281">pdf</a>, <a href="https://arxiv.org/ps/1211.1281">ps</a>, <a href="https://arxiv.org/format/1211.1281">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Disordered Systems and Neural Networks">cond-mat.dis-nn</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistical Mechanics">cond-mat.stat-mech</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Analysis, Statistics and Probability">physics.data-an</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1103/PhysRevE.87.012707">10.1103/PhysRevE.87.012707 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Improved contact prediction in proteins: Using pseudolikelihoods to infer Potts models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Ekeberg%2C+M">Magnus Ekeberg</a>, <a href="/search/q-bio?searchtype=author&amp;query=L%C3%B6vkvist%2C+C">Cecilia L枚vkvist</a>, <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yueheng Lan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Weigt%2C+M">Martin Weigt</a>, <a href="/search/q-bio?searchtype=author&amp;query=Aurell%2C+E">Erik Aurell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1211.1281v2-abstract-short" style="display: inline;"> Spatially proximate amino acids in a protein tend to coevolve. A protein&#39;s three-dimensional (3D) structure hence leaves an echo of correlations in the evolutionary record. Reverse engineering 3D structures from such correlations is an open problem in structural biology, pursued with increasing vigor as more and more protein sequences continue to fill the data banks. Within this task lies a statis&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1211.1281v2-abstract-full').style.display = 'inline'; document.getElementById('1211.1281v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1211.1281v2-abstract-full" style="display: none;"> Spatially proximate amino acids in a protein tend to coevolve. A protein&#39;s three-dimensional (3D) structure hence leaves an echo of correlations in the evolutionary record. Reverse engineering 3D structures from such correlations is an open problem in structural biology, pursued with increasing vigor as more and more protein sequences continue to fill the data banks. Within this task lies a statistical inference problem, rooted in the following: correlation between two sites in a protein sequence can arise from firsthand interaction but can also be network-propagated via intermediate sites; observed correlation is not enough to guarantee proximity. To separate direct from indirect interactions is an instance of the general problem of inverse statistical mechanics, where the task is to learn model parameters (fields, couplings) from observables (magnetizations, correlations, samples) in large systems. In the context of protein sequences, the approach has been referred to as direct-coupling analysis. Here we show that the pseudolikelihood method, applied to 21-state Potts models describing the statistical properties of families of evolutionarily related proteins, significantly outperforms existing approaches to the direct-coupling analysis, the latter being based on standard mean-field techniques. This improved performance also relies on a modified score for the coupling strength. The results are verified using known crystal structures of specific sequence instances of various protein families. Code implementing the new method can be found at http://plmdca.csc.kth.se/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1211.1281v2-abstract-full').style.display = 'none'; document.getElementById('1211.1281v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2013; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 November, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2012. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 16 figures, published version</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> M. Ekeberg, C. L枚vkvist, Y. Lan, M. Weigt, E. Aurell, Improved contact prediction in proteins: Using pseudolikelihoods to infer Potts models, Phys. Rev. E 87, 012707 (2013) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/q-bio/0607028">arXiv:q-bio/0607028</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/q-bio/0607028">pdf</a>, <a href="https://arxiv.org/ps/q-bio/0607028">ps</a>, <a href="https://arxiv.org/format/q-bio/0607028">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Molecular Networks">q-bio.MN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1063/1.2358342">10.1063/1.2358342 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The interplay between discrete noise and nonlinear chemical kinetics in a signal amplification cascade </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yueheng Lan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Papoian%2C+G+A">Garegin A. Papoian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="q-bio/0607028v1-abstract-short" style="display: inline;"> We used various analytical and numerical techniques to elucidate signal propagation in a small enzymatic cascade which is subjected to external and internal noise. The nonlinear character of catalytic reactions, which underlie protein signal transduction cascades, renders stochastic signaling dynamics in cytosol biochemical networks distinct from the usual description of stochastic dynamics in g&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('q-bio/0607028v1-abstract-full').style.display = 'inline'; document.getElementById('q-bio/0607028v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="q-bio/0607028v1-abstract-full" style="display: none;"> We used various analytical and numerical techniques to elucidate signal propagation in a small enzymatic cascade which is subjected to external and internal noise. The nonlinear character of catalytic reactions, which underlie protein signal transduction cascades, renders stochastic signaling dynamics in cytosol biochemical networks distinct from the usual description of stochastic dynamics in gene regulatory networks. For a simple 2-step enzymatic cascade which underlies many important protein signaling pathways, we demonstrated that the commonly used techniques such as the linear noise approximation and the Langevin equation become inadequate when the number of proteins becomes too low. Consequently, we developed a new analytical approximation, based on mixing the generating function and distribution function approaches, to the solution of the master equation that describes nonlinear chemical signaling kinetics for this important class of biochemical reactions. Our techniques work in a much wider range of protein number fluctuations than the methods used previously. We found that under certain conditions the burst-phase noise may be injected into the downstream signaling network dynamics, resulting possibly in unusually large macroscopic fluctuations. In addition to computing first and second moments, which is the goal of commonly used analytical techniques, our new approach provides the full time-dependent probability distributions of the colored non-Gaussian processes in a nonlinear signal transduction cascade. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('q-bio/0607028v1-abstract-full').style.display = 'none'; document.getElementById('q-bio/0607028v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2006; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2006. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/q-bio/0607025">arXiv:q-bio/0607025</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/q-bio/0607025">pdf</a>, <a href="https://arxiv.org/ps/q-bio/0607025">ps</a>, <a href="https://arxiv.org/format/q-bio/0607025">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Molecular Networks">q-bio.MN</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1063/1.2353835">10.1063/1.2353835 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A variational approach to the stochastic aspects of cellular signal transduction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Lan%2C+Y">Yueheng Lan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Wolynes%2C+P+G">Peter G. Wolynes</a>, <a href="/search/q-bio?searchtype=author&amp;query=Papoian%2C+G+A">Garegin A. Papoian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="q-bio/0607025v1-abstract-short" style="display: inline;"> Cellular signaling networks have evolved to cope with intrinsic fluctuations, coming from the small numbers of constituents, and the environmental noise. Stochastic chemical kinetics equations govern the way biochemical networks process noisy signals. The essential difficulty associated with the master equation approach to solving the stochastic chemical kinetics problem is the enormous number o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('q-bio/0607025v1-abstract-full').style.display = 'inline'; document.getElementById('q-bio/0607025v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="q-bio/0607025v1-abstract-full" style="display: none;"> Cellular signaling networks have evolved to cope with intrinsic fluctuations, coming from the small numbers of constituents, and the environmental noise. Stochastic chemical kinetics equations govern the way biochemical networks process noisy signals. The essential difficulty associated with the master equation approach to solving the stochastic chemical kinetics problem is the enormous number of ordinary differential equations involved. In this work, we show how to achieve tremendous reduction in the dimensionality of specific reaction cascade dynamics by solving variationally an equivalent quantum field theoretic formulation of stochastic chemical kinetics. The present formulation avoids cumbersome commutator computations in the derivation of evolution equations, making more transparent the physical significance of the variational method. We propose novel time-dependent basis functions which work well over a wide range of rate parameters. We apply the new basis functions to describe stochastic signaling in several enzymatic cascades and compare the results so obtained with those from alternative solution techniques. The variational ansatz gives probability distributions that agree well with the exact ones, even when fluctuations are large and discreteness and nonlinearity are important. A numerical implementation of our technique is many orders of magnitude more efficient computationally compared with the traditional Monte Carlo simulation algorithms or the Langevin simulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('q-bio/0607025v1-abstract-full').style.display = 'none'; document.getElementById('q-bio/0607025v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2006; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2006. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 11 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> J. Chem. Phys. 125, 124106 (2006) </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10