CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 51 results for author: <span class="mathjax">Sultan, M</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Sultan%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Sultan, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Sultan%2C+M&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Sultan, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Sultan%2C+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Sultan%2C+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Sultan%2C+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17621">arXiv:2411.17621</a> <span> [<a href="https://arxiv.org/pdf/2411.17621">pdf</a>, <a href="https://arxiv.org/format/2411.17621">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> A Combined Feature Embedding Tools for Multi-Class Software Defect and Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M+F">Md. Fahim Sultan</a>, <a href="/search/cs?searchtype=author&query=Karim%2C+T">Tasmin Karim</a>, <a href="/search/cs?searchtype=author&query=Shaon%2C+M+S+H">Md. Shazzad Hossain Shaon</a>, <a href="/search/cs?searchtype=author&query=Wardat%2C+M">Mohammad Wardat</a>, <a href="/search/cs?searchtype=author&query=Akter%2C+M+S">Mst Shapna Akter</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17621v2-abstract-short" style="display: inline;"> In software, a vulnerability is a defect in a program that attackers might utilize to acquire unauthorized access, alter system functions, and acquire information. These vulnerabilities arise from programming faults, design flaws, incorrect setups, and a lack of security protective measures. To mitigate these vulnerabilities, regular software upgrades, code reviews, safe development techniques, an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17621v2-abstract-full').style.display = 'inline'; document.getElementById('2411.17621v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17621v2-abstract-full" style="display: none;"> In software, a vulnerability is a defect in a program that attackers might utilize to acquire unauthorized access, alter system functions, and acquire information. These vulnerabilities arise from programming faults, design flaws, incorrect setups, and a lack of security protective measures. To mitigate these vulnerabilities, regular software upgrades, code reviews, safe development techniques, and the use of security tools to find and fix problems have been important. Several ways have been delivered in recent studies to address difficulties related to software vulnerabilities. However, previous approaches have significant limitations, notably in feature embedding and precisely recognizing specific vulnerabilities. To overcome these drawbacks, we present CodeGraphNet, an experimental method that combines GraphCodeBERT and Graph Convolutional Network (GCN) approaches, where, CodeGraphNet reveals data in a high-dimensional vector space, with comparable or related properties grouped closer together. This method captures intricate relationships between features, providing for more exact identification and separation of vulnerabilities. Using this feature embedding approach, we employed four machine learning models, applying both independent testing and 10-fold cross-validation. The DeepTree model, which is a hybrid of a Decision Tree and a Neural Network, outperforms state-of-the-art approaches. In additional validation, we evaluated our model using feature embeddings from LSA, GloVe, FastText, CodeBERT and GraphCodeBERT, and found that the CodeGraphNet method presented improved vulnerability identification with 98% of accuracy. Our model was tested on a real-time dataset to determine its capacity to handle real-world data and to focus on defect localization, which might influence future studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17621v2-abstract-full').style.display = 'none'; document.getElementById('2411.17621v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10878">arXiv:2411.10878</a> <span> [<a href="https://arxiv.org/pdf/2411.10878">pdf</a>, <a href="https://arxiv.org/format/2411.10878">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Empowering Meta-Analysis: Leveraging Large Language Models for Scientific Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahad%2C+J+I">Jawad Ibn Ahad</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+R+M">Rafeed Mohammad Sultan</a>, <a href="/search/cs?searchtype=author&query=Kaikobad%2C+A">Abraham Kaikobad</a>, <a href="/search/cs?searchtype=author&query=Rahman%2C+F">Fuad Rahman</a>, <a href="/search/cs?searchtype=author&query=Amin%2C+M+R">Mohammad Ruhul Amin</a>, <a href="/search/cs?searchtype=author&query=Mohammed%2C+N">Nabeel Mohammed</a>, <a href="/search/cs?searchtype=author&query=Rahman%2C+S">Shafin Rahman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10878v1-abstract-short" style="display: inline;"> This study investigates the automation of meta-analysis in scientific documents using large language models (LLMs). Meta-analysis is a robust statistical method that synthesizes the findings of multiple studies support articles to provide a comprehensive understanding. We know that a meta-article provides a structured analysis of several articles. However, conducting meta-analysis by hand is labor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10878v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10878v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10878v1-abstract-full" style="display: none;"> This study investigates the automation of meta-analysis in scientific documents using large language models (LLMs). Meta-analysis is a robust statistical method that synthesizes the findings of multiple studies support articles to provide a comprehensive understanding. We know that a meta-article provides a structured analysis of several articles. However, conducting meta-analysis by hand is labor-intensive, time-consuming, and susceptible to human error, highlighting the need for automated pipelines to streamline the process. Our research introduces a novel approach that fine-tunes the LLM on extensive scientific datasets to address challenges in big data handling and structured data extraction. We automate and optimize the meta-analysis process by integrating Retrieval Augmented Generation (RAG). Tailored through prompt engineering and a new loss metric, Inverse Cosine Distance (ICD), designed for fine-tuning on large contextual datasets, LLMs efficiently generate structured meta-analysis content. Human evaluation then assesses relevance and provides information on model performance in key metrics. This research demonstrates that fine-tuned models outperform non-fine-tuned models, with fine-tuned LLMs generating 87.6% relevant meta-analysis abstracts. The relevance of the context, based on human evaluation, shows a reduction in irrelevancy from 4.56% to 1.9%. These experiments were conducted in a low-resource environment, highlighting the study's contribution to enhancing the efficiency and reliability of meta-analysis automation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10878v1-abstract-full').style.display = 'none'; document.getElementById('2411.10878v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in 2024 IEEE International Conference on Big Data (IEEE BigData)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08938">arXiv:2410.08938</a> <span> [<a href="https://arxiv.org/pdf/2410.08938">pdf</a>, <a href="https://arxiv.org/format/2410.08938">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> KinDEL: DNA-Encoded Library Dataset for Kinase Inhibitors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+B">Benson Chen</a>, <a href="/search/cs?searchtype=author&query=Danel%2C+T">Tomasz Danel</a>, <a href="/search/cs?searchtype=author&query=McEnaney%2C+P+J">Patrick J. McEnaney</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+N">Nikhil Jain</a>, <a href="/search/cs?searchtype=author&query=Novikov%2C+K">Kirill Novikov</a>, <a href="/search/cs?searchtype=author&query=Akki%2C+S+U">Spurti Umesh Akki</a>, <a href="/search/cs?searchtype=author&query=Turnbull%2C+J+L">Joshua L. Turnbull</a>, <a href="/search/cs?searchtype=author&query=Pandya%2C+V+A">Virja Atul Pandya</a>, <a href="/search/cs?searchtype=author&query=Belotserkovskii%2C+B+P">Boris P. Belotserkovskii</a>, <a href="/search/cs?searchtype=author&query=Weaver%2C+J+B">Jared Bryce Weaver</a>, <a href="/search/cs?searchtype=author&query=Biswas%2C+A">Ankita Biswas</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+D">Dat Nguyen</a>, <a href="/search/cs?searchtype=author&query=Dreiman%2C+G+H+S">Gabriel H. S. Dreiman</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M">Mohammad Sultan</a>, <a href="/search/cs?searchtype=author&query=Stanley%2C+N">Nathaniel Stanley</a>, <a href="/search/cs?searchtype=author&query=Whalen%2C+D+M">Daniel M Whalen</a>, <a href="/search/cs?searchtype=author&query=Kanichar%2C+D">Divya Kanichar</a>, <a href="/search/cs?searchtype=author&query=Klein%2C+C">Christoph Klein</a>, <a href="/search/cs?searchtype=author&query=Fox%2C+E">Emily Fox</a>, <a href="/search/cs?searchtype=author&query=Watts%2C+R+E">R. Edward Watts</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08938v1-abstract-short" style="display: inline;"> DNA-Encoded Libraries (DEL) are combinatorial small molecule libraries that offer an efficient way to characterize diverse chemical spaces. Selection experiments using DELs are pivotal to drug discovery efforts, enabling high-throughput screens for hit finding. However, limited availability of public DEL datasets hinders the advancement of computational techniques designed to process such data. To… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08938v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08938v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08938v1-abstract-full" style="display: none;"> DNA-Encoded Libraries (DEL) are combinatorial small molecule libraries that offer an efficient way to characterize diverse chemical spaces. Selection experiments using DELs are pivotal to drug discovery efforts, enabling high-throughput screens for hit finding. However, limited availability of public DEL datasets hinders the advancement of computational techniques designed to process such data. To bridge this gap, we present KinDEL, one of the first large, publicly available DEL datasets on two kinases: Mitogen-Activated Protein Kinase 14 (MAPK14) and Discoidin Domain Receptor Tyrosine Kinase 1 (DDR1). Interest in this data modality is growing due to its ability to generate extensive supervised chemical data that densely samples around select molecular structures. Demonstrating one such application of the data, we benchmark different machine learning techniques to develop predictive models for hit identification; in particular, we highlight recent structure-based probabilistic approaches. Finally, we provide biophysical assay data, both on- and off-DNA, to validate our models on a smaller subset of molecules. Data and code for our benchmarks can be found at: https://github.com/insitro/kindel. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08938v1-abstract-full').style.display = 'none'; document.getElementById('2410.08938v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.11879">arXiv:2408.11879</a> <span> [<a href="https://arxiv.org/pdf/2408.11879">pdf</a>, <a href="https://arxiv.org/format/2408.11879">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Beyond Labels: Aligning Large Language Models with Human-like Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kabir%2C+M+R">Muhammad Rafsan Kabir</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+R+M">Rafeed Mohammad Sultan</a>, <a href="/search/cs?searchtype=author&query=Asif%2C+I+H">Ihsanul Haque Asif</a>, <a href="/search/cs?searchtype=author&query=Ahad%2C+J+I">Jawad Ibn Ahad</a>, <a href="/search/cs?searchtype=author&query=Rahman%2C+F">Fuad Rahman</a>, <a href="/search/cs?searchtype=author&query=Amin%2C+M+R">Mohammad Ruhul Amin</a>, <a href="/search/cs?searchtype=author&query=Mohammed%2C+N">Nabeel Mohammed</a>, <a href="/search/cs?searchtype=author&query=Rahman%2C+S">Shafin Rahman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.11879v1-abstract-short" style="display: inline;"> Aligning large language models (LLMs) with a human reasoning approach ensures that LLMs produce morally correct and human-like decisions. Ethical concerns are raised because current models are prone to generating false positives and providing malicious responses. To contribute to this issue, we have curated an ethics dataset named Dataset for Aligning Reasons (DFAR), designed to aid in aligning la… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11879v1-abstract-full').style.display = 'inline'; document.getElementById('2408.11879v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.11879v1-abstract-full" style="display: none;"> Aligning large language models (LLMs) with a human reasoning approach ensures that LLMs produce morally correct and human-like decisions. Ethical concerns are raised because current models are prone to generating false positives and providing malicious responses. To contribute to this issue, we have curated an ethics dataset named Dataset for Aligning Reasons (DFAR), designed to aid in aligning language models to generate human-like reasons. The dataset comprises statements with ethical-unethical labels and their corresponding reasons. In this study, we employed a unique and novel fine-tuning approach that utilizes ethics labels and their corresponding reasons (L+R), in contrast to the existing fine-tuning approach that only uses labels (L). The original pre-trained versions, the existing fine-tuned versions, and our proposed fine-tuned versions of LLMs were then evaluated on an ethical-unethical classification task and a reason-generation task. Our proposed fine-tuning strategy notably outperforms the others in both tasks, achieving significantly higher accuracy scores in the classification task and lower misalignment rates in the reason-generation task. The increase in classification accuracies and decrease in misalignment rates indicate that the L+R fine-tuned models align more with human ethics. Hence, this study illustrates that injecting reasons has substantially improved the alignment of LLMs, resulting in more human-like responses. We have made the DFAR dataset and corresponding codes publicly available at https://github.com/apurba-nsu-rnd-lab/DFAR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11879v1-abstract-full').style.display = 'none'; document.getElementById('2408.11879v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in ICPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08388">arXiv:2408.08388</a> <span> [<a href="https://arxiv.org/pdf/2408.08388">pdf</a>, <a href="https://arxiv.org/format/2408.08388">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Classification of High-dimensional Time Series in Spectral Domain using Explainable Features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Roy%2C+S">Sarbojit Roy</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+S">Malik Shahid Sultan</a>, <a href="/search/cs?searchtype=author&query=Ombao%2C+H">Hernando Ombao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08388v1-abstract-short" style="display: inline;"> Interpretable classification of time series presents significant challenges in high dimensions. Traditional feature selection methods in the frequency domain often assume sparsity in spectral density matrices (SDMs) or their inverses, which can be restrictive for real-world applications. In this article, we propose a model-based approach for classifying high-dimensional stationary time series by a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08388v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08388v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08388v1-abstract-full" style="display: none;"> Interpretable classification of time series presents significant challenges in high dimensions. Traditional feature selection methods in the frequency domain often assume sparsity in spectral density matrices (SDMs) or their inverses, which can be restrictive for real-world applications. In this article, we propose a model-based approach for classifying high-dimensional stationary time series by assuming sparsity in the difference between inverse SDMs. Our approach emphasizes the interpretability of model parameters, making it especially suitable for fields like neuroscience, where understanding differences in brain network connectivity across various states is crucial. The estimators for model parameters demonstrate consistency under appropriate conditions. We further propose using standard deep learning optimizers for parameter estimation, employing techniques such as mini-batching and learning rate scheduling. Additionally, we introduce a method to screen the most discriminatory frequencies for classification, which exhibits the sure screening property under general conditions. The flexibility of the proposed model allows the significance of covariates to vary across frequencies, enabling nuanced inferences and deeper insights into the underlying problem. The novelty of our method lies in the interpretability of the model parameters, addressing critical needs in neuroscience. The proposed approaches have been evaluated on simulated examples and the `Alert-vs-Drowsy' EEG dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08388v1-abstract-full').style.display = 'none'; document.getElementById('2408.08388v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.07254">arXiv:2407.07254</a> <span> [<a href="https://arxiv.org/pdf/2407.07254">pdf</a>, <a href="https://arxiv.org/format/2407.07254">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HAMIL-QA: Hierarchical Approach to Multiple Instance Learning for Atrial LGE MRI Quality Assessment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+K+M+A">K M Arefeen Sultan</a>, <a href="/search/cs?searchtype=author&query=Hisham%2C+M+H+H">Md Hasibul Husain Hisham</a>, <a href="/search/cs?searchtype=author&query=Orkild%2C+B">Benjamin Orkild</a>, <a href="/search/cs?searchtype=author&query=Morris%2C+A">Alan Morris</a>, <a href="/search/cs?searchtype=author&query=Kholmovski%2C+E">Eugene Kholmovski</a>, <a href="/search/cs?searchtype=author&query=Bieging%2C+E">Erik Bieging</a>, <a href="/search/cs?searchtype=author&query=Kwan%2C+E">Eugene Kwan</a>, <a href="/search/cs?searchtype=author&query=Ranjan%2C+R">Ravi Ranjan</a>, <a href="/search/cs?searchtype=author&query=DiBella%2C+E">Ed DiBella</a>, <a href="/search/cs?searchtype=author&query=Elhabian%2C+S">Shireen Elhabian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.07254v1-abstract-short" style="display: inline;"> The accurate evaluation of left atrial fibrosis via high-quality 3D Late Gadolinium Enhancement (LGE) MRI is crucial for atrial fibrillation management but is hindered by factors like patient movement and imaging variability. The pursuit of automated LGE MRI quality assessment is critical for enhancing diagnostic accuracy, standardizing evaluations, and improving patient outcomes. The deep learnin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07254v1-abstract-full').style.display = 'inline'; document.getElementById('2407.07254v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.07254v1-abstract-full" style="display: none;"> The accurate evaluation of left atrial fibrosis via high-quality 3D Late Gadolinium Enhancement (LGE) MRI is crucial for atrial fibrillation management but is hindered by factors like patient movement and imaging variability. The pursuit of automated LGE MRI quality assessment is critical for enhancing diagnostic accuracy, standardizing evaluations, and improving patient outcomes. The deep learning models aimed at automating this process face significant challenges due to the scarcity of expert annotations, high computational costs, and the need to capture subtle diagnostic details in highly variable images. This study introduces HAMIL-QA, a multiple instance learning (MIL) framework, designed to overcome these obstacles. HAMIL-QA employs a hierarchical bag and sub-bag structure that allows for targeted analysis within sub-bags and aggregates insights at the volume level. This hierarchical MIL approach reduces reliance on extensive annotations, lessens computational load, and ensures clinically relevant quality predictions by focusing on diagnostically critical image features. Our experiments show that HAMIL-QA surpasses existing MIL methods and traditional supervised approaches in accuracy, AUROC, and F1-Score on an LGE MRI scan dataset, demonstrating its potential as a scalable solution for LGE MRI quality assessment automation. The code is available at: $\href{https://github.com/arf111/HAMIL-QA}{\text{this https URL}}$ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07254v1-abstract-full').style.display = 'none'; document.getElementById('2407.07254v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to MICCAI2024, 10 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15657">arXiv:2406.15657</a> <span> [<a href="https://arxiv.org/pdf/2406.15657">pdf</a>, <a href="https://arxiv.org/format/2406.15657">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> FIRST: Faster Improved Listwise Reranking with Single Token Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+R+G">Revanth Gangi Reddy</a>, <a href="/search/cs?searchtype=author&query=Doo%2C+J">JaeHyeok Doo</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yifei Xu</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Swain%2C+D">Deevya Swain</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+H">Heng Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15657v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have significantly advanced the field of information retrieval, particularly for reranking. Listwise LLM rerankers have showcased superior performance and generalizability compared to existing supervised approaches. However, conventional listwise LLM reranking methods lack efficiency as they provide ranking output in the form of a generated ordered sequence of candidat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15657v1-abstract-full').style.display = 'inline'; document.getElementById('2406.15657v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15657v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have significantly advanced the field of information retrieval, particularly for reranking. Listwise LLM rerankers have showcased superior performance and generalizability compared to existing supervised approaches. However, conventional listwise LLM reranking methods lack efficiency as they provide ranking output in the form of a generated ordered sequence of candidate passage identifiers. Further, they are trained with the typical language modeling objective, which treats all ranking errors uniformly--potentially at the cost of misranking highly relevant passages. Addressing these limitations, we introduce FIRST, a novel listwise LLM reranking approach leveraging the output logits of the first generated identifier to directly obtain a ranked ordering of the candidates. Further, we incorporate a learning-to-rank loss during training, prioritizing ranking accuracy for the more relevant passages. Empirical results demonstrate that FIRST accelerates inference by 50% while maintaining a robust ranking performance with gains across the BEIR benchmark. Finally, to illustrate the practical effectiveness of listwise LLM rerankers, we investigate their application in providing relevance feedback for retrievers during inference. Our results show that LLM rerankers can provide a stronger distillation signal compared to cross-encoders, yielding substantial improvements in retriever recall after relevance feedback. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15657v1-abstract-full').style.display = 'none'; document.getElementById('2406.15657v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11706">arXiv:2406.11706</a> <span> [<a href="https://arxiv.org/pdf/2406.11706">pdf</a>, <a href="https://arxiv.org/format/2406.11706">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Prompts as Auto-Optimized Training Hyperparameters: Training Best-in-Class IR Models from Scratch with 10 Gold Labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xian%2C+J">Jasper Xian</a>, <a href="/search/cs?searchtype=author&query=Samuel%2C+S">Saron Samuel</a>, <a href="/search/cs?searchtype=author&query=Khoubsirat%2C+F">Faraz Khoubsirat</a>, <a href="/search/cs?searchtype=author&query=Pradeep%2C+R">Ronak Pradeep</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Florian%2C+R">Radu Florian</a>, <a href="/search/cs?searchtype=author&query=Roukos%2C+S">Salim Roukos</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a>, <a href="/search/cs?searchtype=author&query=Potts%2C+C">Christopher Potts</a>, <a href="/search/cs?searchtype=author&query=Khattab%2C+O">Omar Khattab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11706v1-abstract-short" style="display: inline;"> We develop a method for training small-scale (under 100M parameter) neural information retrieval models with as few as 10 gold relevance labels. The method depends on generating synthetic queries for documents using a language model (LM), and the key step is that we automatically optimize the LM prompt that is used to generate these queries based on training quality. In experiments with the BIRCO… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11706v1-abstract-full').style.display = 'inline'; document.getElementById('2406.11706v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11706v1-abstract-full" style="display: none;"> We develop a method for training small-scale (under 100M parameter) neural information retrieval models with as few as 10 gold relevance labels. The method depends on generating synthetic queries for documents using a language model (LM), and the key step is that we automatically optimize the LM prompt that is used to generate these queries based on training quality. In experiments with the BIRCO benchmark, we find that models trained with our method outperform RankZephyr and are competitive with RankLLama, both of which are 7B parameter models trained on over 100K labels. These findings point to the power of automatic prompt optimization for synthetic dataset generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11706v1-abstract-full').style.display = 'none'; document.getElementById('2406.11706v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04346">arXiv:2406.04346</a> <span> [<a href="https://arxiv.org/pdf/2406.04346">pdf</a>, <a href="https://arxiv.org/format/2406.04346">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3644815.3644981">10.1145/3644815.3644981 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Automating Patch Set Generation from Code Review Comments Using Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rahman%2C+T">Tajmilur Rahman</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+R">Rahul Singh</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+Y">Mir Yousuf Sultan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04346v1-abstract-short" style="display: inline;"> The advent of Large Language Models (LLMs) has revolutionized various domains of artificial intelligence, including the realm of software engineering. In this research, we evaluate the efficacy of pre-trained LLMs in replicating the tasks traditionally performed by developers in response to code review comments. We provide code contexts to five popular LLMs and obtain the suggested code-changes (p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04346v1-abstract-full').style.display = 'inline'; document.getElementById('2406.04346v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04346v1-abstract-full" style="display: none;"> The advent of Large Language Models (LLMs) has revolutionized various domains of artificial intelligence, including the realm of software engineering. In this research, we evaluate the efficacy of pre-trained LLMs in replicating the tasks traditionally performed by developers in response to code review comments. We provide code contexts to five popular LLMs and obtain the suggested code-changes (patch sets) derived from real-world code-review comments. The performance of each model is meticulously assessed by comparing their generated patch sets against the historical data of human-generated patch-sets from the same repositories. This comparative analysis aims to determine the accuracy, relevance, and depth of the LLMs' feedback, thereby evaluating their readiness to support developers in responding to code-review comments. Novelty: This particular research area is still immature requiring a substantial amount of studies yet to be done. No prior research has compared the performance of existing Large Language Models (LLMs) in code-review comments. This in-progress study assesses current LLMs in code review and paves the way for future advancements in automated code quality assurance, reducing context-switching overhead due to interruptions from code change requests. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04346v1-abstract-full').style.display = 'none'; document.getElementById('2406.04346v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.00827">arXiv:2403.00827</a> <span> [<a href="https://arxiv.org/pdf/2403.00827">pdf</a>, <a href="https://arxiv.org/format/2403.00827">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Self-Refinement of Language Models from External Proxy Metrics Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ramji%2C+K">Keshav Ramji</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Young-Suk Lee</a>, <a href="/search/cs?searchtype=author&query=Astudillo%2C+R+F">Ram贸n Fernandez Astudillo</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Naseem%2C+T">Tahira Naseem</a>, <a href="/search/cs?searchtype=author&query=Munawar%2C+A">Asim Munawar</a>, <a href="/search/cs?searchtype=author&query=Florian%2C+R">Radu Florian</a>, <a href="/search/cs?searchtype=author&query=Roukos%2C+S">Salim Roukos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.00827v1-abstract-short" style="display: inline;"> It is often desirable for Large Language Models (LLMs) to capture multiple objectives when providing a response. In document-grounded response generation, for example, agent responses are expected to be relevant to a user's query while also being grounded in a given document. In this paper, we introduce Proxy Metric-based Self-Refinement (ProMiSe), which enables an LLM to refine its own initial re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00827v1-abstract-full').style.display = 'inline'; document.getElementById('2403.00827v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.00827v1-abstract-full" style="display: none;"> It is often desirable for Large Language Models (LLMs) to capture multiple objectives when providing a response. In document-grounded response generation, for example, agent responses are expected to be relevant to a user's query while also being grounded in a given document. In this paper, we introduce Proxy Metric-based Self-Refinement (ProMiSe), which enables an LLM to refine its own initial response along key dimensions of quality guided by external metrics feedback, yielding an overall better final response. ProMiSe leverages feedback on response quality through principle-specific proxy metrics, and iteratively refines its response one principle at a time. We apply ProMiSe to open source language models Flan-T5-XXL and Llama-2-13B-Chat, to evaluate its performance on document-grounded question answering datasets, MultiDoc2Dial and QuAC, demonstrating that self-refinement improves response quality. We further show that fine-tuning Llama-2-13B-Chat on the synthetic dialogue data generated by ProMiSe yields significant performance improvements over the zero-shot baseline as well as a supervised fine-tuned model on human annotated data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00827v1-abstract-full').style.display = 'none'; document.getElementById('2403.00827v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.11770">arXiv:2402.11770</a> <span> [<a href="https://arxiv.org/pdf/2402.11770">pdf</a>, <a href="https://arxiv.org/format/2402.11770">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Structured Chain-of-Thought Prompting for Few-Shot Generation of Content-Grounded QA Conversations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Ganhotra%2C+J">Jatin Ganhotra</a>, <a href="/search/cs?searchtype=author&query=Astudillo%2C+R+F">Ram贸n Fernandez Astudillo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.11770v2-abstract-short" style="display: inline;"> We introduce a structured chain-of-thought (SCoT) prompting approach to generating content-grounded multi-turn question-answer conversations using a pre-trained large language model (LLM). At the core of our proposal is a structured breakdown of the complex task into a number of states in a state machine, so that actions corresponding to various subtasks, e.g., content reading and utterance genera… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11770v2-abstract-full').style.display = 'inline'; document.getElementById('2402.11770v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.11770v2-abstract-full" style="display: none;"> We introduce a structured chain-of-thought (SCoT) prompting approach to generating content-grounded multi-turn question-answer conversations using a pre-trained large language model (LLM). At the core of our proposal is a structured breakdown of the complex task into a number of states in a state machine, so that actions corresponding to various subtasks, e.g., content reading and utterance generation, can be executed in their own dedicated states. Each state leverages a unique set of resources including prompts and (optionally) additional tools to augment the generation process. Our experimental results show that SCoT prompting with designated states for hallucination mitigation increases agent faithfulness to grounding documents by up to 16.8%. When used as training data, our open-domain conversations synthesized from only 6 Wikipedia-based seed demonstrations train strong conversational QA agents; in out-of-domain evaluation, for example, we observe improvements of up to 13.9% over target domain gold data when the latter is augmented with our generated examples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11770v2-abstract-full').style.display = 'none'; document.getElementById('2402.11770v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.06356">arXiv:2401.06356</a> <span> [<a href="https://arxiv.org/pdf/2401.06356">pdf</a>, <a href="https://arxiv.org/format/2401.06356">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Empirical Investigation into the Effect of Parameter Choices in Knowledge Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+A">Aashka Trivedi</a>, <a href="/search/cs?searchtype=author&query=Awasthy%2C+P">Parul Awasthy</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.06356v2-abstract-short" style="display: inline;"> We present a large-scale empirical study of how choices of configuration parameters affect performance in knowledge distillation (KD). An example of such a KD parameter is the measure of distance between the predictions of the teacher and the student, common choices for which include the mean squared error (MSE) and the KL-divergence. Although scattered efforts have been made to understand the dif… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.06356v2-abstract-full').style.display = 'inline'; document.getElementById('2401.06356v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.06356v2-abstract-full" style="display: none;"> We present a large-scale empirical study of how choices of configuration parameters affect performance in knowledge distillation (KD). An example of such a KD parameter is the measure of distance between the predictions of the teacher and the student, common choices for which include the mean squared error (MSE) and the KL-divergence. Although scattered efforts have been made to understand the differences between such options, the KD literature still lacks a systematic study on their general effect on student performance. We take an empirical approach to this question in this paper, seeking to find out the extent to which such choices influence student performance across 13 datasets from 4 NLP tasks and 3 student sizes. We quantify the cost of making sub-optimal choices and identify a single configuration that performs well across the board. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.06356v2-abstract-full').style.display = 'none'; document.getElementById('2401.06356v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.08640">arXiv:2311.08640</a> <span> [<a href="https://arxiv.org/pdf/2311.08640">pdf</a>, <a href="https://arxiv.org/format/2311.08640">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multistage Collaborative Knowledge Distillation from a Large Language Model for Semi-Supervised Sequence Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jiachen Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Wenlong Zhao</a>, <a href="/search/cs?searchtype=author&query=Drozdov%2C+A">Andrew Drozdov</a>, <a href="/search/cs?searchtype=author&query=Rozonoyer%2C+B">Benjamin Rozonoyer</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jay-Yoon Lee</a>, <a href="/search/cs?searchtype=author&query=Iyyer%2C+M">Mohit Iyyer</a>, <a href="/search/cs?searchtype=author&query=McCallum%2C+A">Andrew McCallum</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.08640v4-abstract-short" style="display: inline;"> We study semi-supervised sequence generation tasks, where the few labeled examples are too scarce to finetune a model, and meanwhile, few-shot prompted large language models (LLMs) exhibit room for improvement. In this paper, we present the discovery that a student model distilled from a few-shot prompted LLM can commonly generalize better than its teacher to unseen examples on such tasks. We find… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.08640v4-abstract-full').style.display = 'inline'; document.getElementById('2311.08640v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.08640v4-abstract-full" style="display: none;"> We study semi-supervised sequence generation tasks, where the few labeled examples are too scarce to finetune a model, and meanwhile, few-shot prompted large language models (LLMs) exhibit room for improvement. In this paper, we present the discovery that a student model distilled from a few-shot prompted LLM can commonly generalize better than its teacher to unseen examples on such tasks. We find that the student is able to learn a general pattern from the high-quality pseudolabels produced by the teacher during knowledge distillation (KD), and favorably not a general pattern from the low-quality pseudolables. Leveraging this discovery, we propose a new method, Multistage Collaborative Knowledge Distillation from an LLM (MCKD), for these tasks. MCKD first few-shot prompts an LLM to produce pseudolabels for unlabeled data. Then at each stage of an iterative KD process, a new pair of students is trained on disjoint partitions of the pseudolabeled data, and produces new and improved pseudolabels for their unseen partitions. We conduct extensive experiments on four syntactic and semantic parsing datasets and show the effectiveness of MCKD for low-resource semi-supervised sequence generation. On CRAFT biomedical parsing, for example, 3-stage MCKD with 50 labeled examples outperforms an LLM teacher and vanilla KD by 7.5% and 3.7% parsing F1, respectively, and matches the performance of supervised finetuning with 500 labeled examples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.08640v4-abstract-full').style.display = 'none'; document.getElementById('2311.08640v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.13961">arXiv:2310.13961</a> <span> [<a href="https://arxiv.org/pdf/2310.13961">pdf</a>, <a href="https://arxiv.org/format/2310.13961">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Ensemble-Instruct: Generating Instruction-Tuning Data with a Heterogeneous Mixture of LMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Young-Suk Lee</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=El-Kurdi%2C+Y">Yousef El-Kurdi</a>, <a href="/search/cs?searchtype=author&query=Munawar%2C+T+N+A">Tahira Naseem Asim Munawar</a>, <a href="/search/cs?searchtype=author&query=Florian%2C+R">Radu Florian</a>, <a href="/search/cs?searchtype=author&query=Roukos%2C+S">Salim Roukos</a>, <a href="/search/cs?searchtype=author&query=Astudillo%2C+R+F">Ram贸n Fernandez Astudillo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.13961v1-abstract-short" style="display: inline;"> Using in-context learning (ICL) for data generation, techniques such as Self-Instruct (Wang et al., 2023) or the follow-up Alpaca (Taori et al., 2023) can train strong conversational agents with only a small amount of human supervision. One limitation of these approaches is that they resort to very large language models (around 175B parameters) that are also proprietary and non-public. Here we exp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13961v1-abstract-full').style.display = 'inline'; document.getElementById('2310.13961v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.13961v1-abstract-full" style="display: none;"> Using in-context learning (ICL) for data generation, techniques such as Self-Instruct (Wang et al., 2023) or the follow-up Alpaca (Taori et al., 2023) can train strong conversational agents with only a small amount of human supervision. One limitation of these approaches is that they resort to very large language models (around 175B parameters) that are also proprietary and non-public. Here we explore the application of such techniques to language models that are much smaller (around 10B--40B parameters) and have permissive licenses. We find the Self-Instruct approach to be less effective at these sizes and propose new ICL methods that draw on two main ideas: (a) Categorization and simplification of the ICL templates to make prompt learning easier for the LM, and (b) Ensembling over multiple LM outputs to help select high-quality synthetic examples. Our algorithm leverages the 175 Self-Instruct seed tasks and employs separate pipelines for instructions that require an input and instructions that do not. Empirical investigations with different LMs show that: (1) Our proposed method yields higher-quality instruction tuning data than Self-Instruct, (2) It improves performances of both vanilla and instruction-tuned LMs by significant margins, and (3) Smaller instruction-tuned LMs generate more useful outputs than their larger un-tuned counterparts. Our codebase is available at https://github.com/IBM/ensemble-instruct. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13961v1-abstract-full').style.display = 'none'; document.getElementById('2310.13961v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> EMNLP 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.08805">arXiv:2310.08805</a> <span> [<a href="https://arxiv.org/pdf/2310.08805">pdf</a>, <a href="https://arxiv.org/format/2310.08805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-031-52448-6_22">10.1007/978-3-031-52448-6_22 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Two-Stage Deep Learning Framework for Quality Assessment of Left Atrial Late Gadolinium Enhanced MRI Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+K+M+A">K M Arefeen Sultan</a>, <a href="/search/cs?searchtype=author&query=Orkild%2C+B">Benjamin Orkild</a>, <a href="/search/cs?searchtype=author&query=Morris%2C+A">Alan Morris</a>, <a href="/search/cs?searchtype=author&query=Kholmovski%2C+E">Eugene Kholmovski</a>, <a href="/search/cs?searchtype=author&query=Bieging%2C+E">Erik Bieging</a>, <a href="/search/cs?searchtype=author&query=Kwan%2C+E">Eugene Kwan</a>, <a href="/search/cs?searchtype=author&query=Ranjan%2C+R">Ravi Ranjan</a>, <a href="/search/cs?searchtype=author&query=DiBella%2C+E">Ed DiBella</a>, <a href="/search/cs?searchtype=author&query=Elhabian%2C+S">Shireen Elhabian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.08805v1-abstract-short" style="display: inline;"> Accurate assessment of left atrial fibrosis in patients with atrial fibrillation relies on high-quality 3D late gadolinium enhancement (LGE) MRI images. However, obtaining such images is challenging due to patient motion, changing breathing patterns, or sub-optimal choice of pulse sequence parameters. Automated assessment of LGE-MRI image diagnostic quality is clinically significant as it would en… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.08805v1-abstract-full').style.display = 'inline'; document.getElementById('2310.08805v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.08805v1-abstract-full" style="display: none;"> Accurate assessment of left atrial fibrosis in patients with atrial fibrillation relies on high-quality 3D late gadolinium enhancement (LGE) MRI images. However, obtaining such images is challenging due to patient motion, changing breathing patterns, or sub-optimal choice of pulse sequence parameters. Automated assessment of LGE-MRI image diagnostic quality is clinically significant as it would enhance diagnostic accuracy, improve efficiency, ensure standardization, and contributes to better patient outcomes by providing reliable and high-quality LGE-MRI scans for fibrosis quantification and treatment planning. To address this, we propose a two-stage deep-learning approach for automated LGE-MRI image diagnostic quality assessment. The method includes a left atrium detector to focus on relevant regions and a deep network to evaluate diagnostic quality. We explore two training strategies, multi-task learning, and pretraining using contrastive learning, to overcome limited annotated data in medical imaging. Contrastive Learning result shows about $4\%$, and $9\%$ improvement in F1-Score and Specificity compared to Multi-Task learning when there's limited data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.08805v1-abstract-full').style.display = 'none'; document.getElementById('2310.08805v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to STACOM 2023. 11 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.16275">arXiv:2307.16275</a> <span> [<a href="https://arxiv.org/pdf/2307.16275">pdf</a>, <a href="https://arxiv.org/format/2307.16275">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Stylized Projected GAN: A Novel Architecture for Fast and Realistic Image Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Muttakin%2C+M+N">Md Nurul Muttakin</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+S">Malik Shahid Sultan</a>, <a href="/search/cs?searchtype=author&query=Hoehndorf%2C+R">Robert Hoehndorf</a>, <a href="/search/cs?searchtype=author&query=Ombao%2C+H">Hernando Ombao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.16275v1-abstract-short" style="display: inline;"> Generative Adversarial Networks are used for generating the data using a generator and a discriminator, GANs usually produce high-quality images, but training GANs in an adversarial setting is a difficult task. GANs require high computation power and hyper-parameter regularization for converging. Projected GANs tackle the training difficulty of GANs by using transfer learning to project the genera… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.16275v1-abstract-full').style.display = 'inline'; document.getElementById('2307.16275v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.16275v1-abstract-full" style="display: none;"> Generative Adversarial Networks are used for generating the data using a generator and a discriminator, GANs usually produce high-quality images, but training GANs in an adversarial setting is a difficult task. GANs require high computation power and hyper-parameter regularization for converging. Projected GANs tackle the training difficulty of GANs by using transfer learning to project the generated and real samples into a pre-trained feature space. Projected GANs improve the training time and convergence but produce artifacts in the generated images which reduce the quality of the generated samples, we propose an optimized architecture called Stylized Projected GANs which integrates the mapping network of the Style GANs with Skip Layer Excitation of Fast GAN. The integrated modules are incorporated within the generator architecture of the Fast GAN to mitigate the problem of artifacts in the generated images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.16275v1-abstract-full').style.display = 'none'; document.getElementById('2307.16275v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">We present a new architecture for generating realistic images by combining mapping network of Style GANs and Projected GANs</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.11744">arXiv:2305.11744</a> <span> [<a href="https://arxiv.org/pdf/2305.11744">pdf</a>, <a href="https://arxiv.org/format/2305.11744">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ReFIT: Relevance Feedback from a Reranker during Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+R+G">Revanth Gangi Reddy</a>, <a href="/search/cs?searchtype=author&query=Dasigi%2C+P">Pradeep Dasigi</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Cohan%2C+A">Arman Cohan</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+H">Heng Ji</a>, <a href="/search/cs?searchtype=author&query=Hajishirzi%2C+H">Hannaneh Hajishirzi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.11744v2-abstract-short" style="display: inline;"> Retrieve-and-rerank is a prevalent framework in neural information retrieval, wherein a bi-encoder network initially retrieves a pre-defined number of candidates (e.g., K=100), which are then reranked by a more powerful cross-encoder model. While the reranker often yields improved candidate scores compared to the retriever, its scope is confined to only the top K retrieved candidates. As a result,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11744v2-abstract-full').style.display = 'inline'; document.getElementById('2305.11744v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.11744v2-abstract-full" style="display: none;"> Retrieve-and-rerank is a prevalent framework in neural information retrieval, wherein a bi-encoder network initially retrieves a pre-defined number of candidates (e.g., K=100), which are then reranked by a more powerful cross-encoder model. While the reranker often yields improved candidate scores compared to the retriever, its scope is confined to only the top K retrieved candidates. As a result, the reranker cannot improve retrieval performance in terms of Recall@K. In this work, we propose to leverage the reranker to improve recall by making it provide relevance feedback to the retriever at inference time. Specifically, given a test instance during inference, we distill the reranker's predictions for that instance into the retriever's query representation using a lightweight update mechanism. The aim of the distillation loss is to align the retriever's candidate scores more closely with those produced by the reranker. The algorithm then proceeds by executing a second retrieval step using the updated query vector. We empirically demonstrate that this method, applicable to various retrieve-and-rerank frameworks, substantially enhances retrieval recall across multiple domains, languages, and modalities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11744v2-abstract-full').style.display = 'none'; document.getElementById('2305.11744v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.08248">arXiv:2303.08248</a> <span> [<a href="https://arxiv.org/pdf/2303.08248">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5121/ijcnc.2023.15101">10.5121/ijcnc.2023.15101 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> An Intrusion Detection Mechanism for MANETs Based on Deep Learning Artificial Neural Networks (ANNs) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M+T">Mohamad T Sultan</a>, <a href="/search/cs?searchtype=author&query=Sayed%2C+H+E">Hesham El Sayed</a>, <a href="/search/cs?searchtype=author&query=Khan%2C+M+A">Manzoor Ahmed Khan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.08248v1-abstract-short" style="display: inline;"> Mobile Ad-hoc Network (MANET) is a distributed, decentralized network of wireless portable nodes connecting directly without any fixed communication base station or centralized administration. Nodes in MANET move continuously in random directions and follow an arbitrary manner, which presents numerous challenges to these networks and make them more susceptible to different security threats. Due to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08248v1-abstract-full').style.display = 'inline'; document.getElementById('2303.08248v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.08248v1-abstract-full" style="display: none;"> Mobile Ad-hoc Network (MANET) is a distributed, decentralized network of wireless portable nodes connecting directly without any fixed communication base station or centralized administration. Nodes in MANET move continuously in random directions and follow an arbitrary manner, which presents numerous challenges to these networks and make them more susceptible to different security threats. Due to this decentralized nature of their overall architecture, combined with the limitation of hardware resources, those infrastructure-less networks are more susceptible to different security attacks such as black hole attack, network partition, node selfishness, and Denial of Service (DoS) attacks. This work aims to present, investigate, and design an intrusion detection predictive technique for Mobile Ad hoc networks using deep learning artificial neural networks (ANNs). A simulation-based evaluation and a deep ANNs modelling for detecting and isolating a Denial of Service (DoS) attack are presented to improve the overall security level of Mobile ad hoc networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08248v1-abstract-full').style.display = 'none'; document.getElementById('2303.08248v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.00807">arXiv:2303.00807</a> <span> [<a href="https://arxiv.org/pdf/2303.00807">pdf</a>, <a href="https://arxiv.org/format/2303.00807">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> UDAPDR: Unsupervised Domain Adaptation via LLM Prompting and Distillation of Rerankers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saad-Falcon%2C+J">Jon Saad-Falcon</a>, <a href="/search/cs?searchtype=author&query=Khattab%2C+O">Omar Khattab</a>, <a href="/search/cs?searchtype=author&query=Santhanam%2C+K">Keshav Santhanam</a>, <a href="/search/cs?searchtype=author&query=Florian%2C+R">Radu Florian</a>, <a href="/search/cs?searchtype=author&query=Franz%2C+M">Martin Franz</a>, <a href="/search/cs?searchtype=author&query=Roukos%2C+S">Salim Roukos</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Potts%2C+C">Christopher Potts</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.00807v3-abstract-short" style="display: inline;"> Many information retrieval tasks require large labeled datasets for fine-tuning. However, such datasets are often unavailable, and their utility for real-world applications can diminish quickly due to domain shifts. To address this challenge, we develop and motivate a method for using large language models (LLMs) to generate large numbers of synthetic queries cheaply. The method begins by generati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.00807v3-abstract-full').style.display = 'inline'; document.getElementById('2303.00807v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.00807v3-abstract-full" style="display: none;"> Many information retrieval tasks require large labeled datasets for fine-tuning. However, such datasets are often unavailable, and their utility for real-world applications can diminish quickly due to domain shifts. To address this challenge, we develop and motivate a method for using large language models (LLMs) to generate large numbers of synthetic queries cheaply. The method begins by generating a small number of synthetic queries using an expensive LLM. After that, a much less expensive one is used to create large numbers of synthetic queries, which are used to fine-tune a family of reranker models. These rerankers are then distilled into a single efficient retriever for use in the target domain. We show that this technique boosts zero-shot accuracy in long-tail domains and achieves substantially lower latency than standard reranking methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.00807v3-abstract-full').style.display = 'none'; document.getElementById('2303.00807v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Long Paper at Empirical Methods in Natural Language Processing (EMNLP) 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.12609">arXiv:2301.12609</a> <span> [<a href="https://arxiv.org/pdf/2301.12609">pdf</a>, <a href="https://arxiv.org/format/2301.12609">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Knowledge Distillation $\approx$ Label Smoothing: Fact or Fallacy? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.12609v4-abstract-short" style="display: inline;"> Originally proposed as a method for knowledge transfer from one model to another, some recent studies have suggested that knowledge distillation (KD) is in fact a form of regularization. Perhaps the strongest argument of all for this new perspective comes from its apparent similarities with label smoothing (LS). Here we re-examine this stated equivalence between the two methods by comparing the pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12609v4-abstract-full').style.display = 'inline'; document.getElementById('2301.12609v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.12609v4-abstract-full" style="display: none;"> Originally proposed as a method for knowledge transfer from one model to another, some recent studies have suggested that knowledge distillation (KD) is in fact a form of regularization. Perhaps the strongest argument of all for this new perspective comes from its apparent similarities with label smoothing (LS). Here we re-examine this stated equivalence between the two methods by comparing the predictive confidences of the models they train. Experiments on four text classification tasks involving models of different sizes show that: (a) In most settings, KD and LS drive model confidence in completely opposite directions, and (b) In KD, the student inherits not only its knowledge but also its confidence from the teacher, reinforcing the classical knowledge transfer view. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12609v4-abstract-full').style.display = 'none'; document.getElementById('2301.12609v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.09715">arXiv:2301.09715</a> <span> [<a href="https://arxiv.org/pdf/2301.09715">pdf</a>, <a href="https://arxiv.org/format/2301.09715">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PrimeQA: The Prime Repository for State-of-the-Art Multilingual Question Answering Research and Development </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a>, <a href="/search/cs?searchtype=author&query=Sen%2C+J">Jaydeep Sen</a>, <a href="/search/cs?searchtype=author&query=Iyer%2C+B">Bhavani Iyer</a>, <a href="/search/cs?searchtype=author&query=Franz%2C+M">Martin Franz</a>, <a href="/search/cs?searchtype=author&query=Fadnis%2C+K">Kshitij Fadnis</a>, <a href="/search/cs?searchtype=author&query=Bornea%2C+M">Mihaela Bornea</a>, <a href="/search/cs?searchtype=author&query=Rosenthal%2C+S">Sara Rosenthal</a>, <a href="/search/cs?searchtype=author&query=McCarley%2C+S">Scott McCarley</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rong Zhang</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+V">Vishwajeet Kumar</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yulong Li</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Bhat%2C+R">Riyaz Bhat</a>, <a href="/search/cs?searchtype=author&query=Florian%2C+R">Radu Florian</a>, <a href="/search/cs?searchtype=author&query=Roukos%2C+S">Salim Roukos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.09715v2-abstract-short" style="display: inline;"> The field of Question Answering (QA) has made remarkable progress in recent years, thanks to the advent of large pre-trained language models, newer realistic benchmark datasets with leaderboards, and novel algorithms for key components such as retrievers and readers. In this paper, we introduce PRIMEQA: a one-stop and open-source QA repository with an aim to democratize QA re-search and facilitate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.09715v2-abstract-full').style.display = 'inline'; document.getElementById('2301.09715v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.09715v2-abstract-full" style="display: none;"> The field of Question Answering (QA) has made remarkable progress in recent years, thanks to the advent of large pre-trained language models, newer realistic benchmark datasets with leaderboards, and novel algorithms for key components such as retrievers and readers. In this paper, we introduce PRIMEQA: a one-stop and open-source QA repository with an aim to democratize QA re-search and facilitate easy replication of state-of-the-art (SOTA) QA methods. PRIMEQA supports core QA functionalities like retrieval and reading comprehension as well as auxiliary capabilities such as question generation.It has been designed as an end-to-end toolkit for various use cases: building front-end applications, replicating SOTA methods on pub-lic benchmarks, and expanding pre-existing methods. PRIMEQA is available at : https://github.com/primeqa. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.09715v2-abstract-full').style.display = 'none'; document.getElementById('2301.09715v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.01340">arXiv:2212.01340</a> <span> [<a href="https://arxiv.org/pdf/2212.01340">pdf</a>, <a href="https://arxiv.org/format/2212.01340">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Moving Beyond Downstream Task Accuracy for Information Retrieval Benchmarking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Santhanam%2C+K">Keshav Santhanam</a>, <a href="/search/cs?searchtype=author&query=Saad-Falcon%2C+J">Jon Saad-Falcon</a>, <a href="/search/cs?searchtype=author&query=Franz%2C+M">Martin Franz</a>, <a href="/search/cs?searchtype=author&query=Khattab%2C+O">Omar Khattab</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a>, <a href="/search/cs?searchtype=author&query=Florian%2C+R">Radu Florian</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Roukos%2C+S">Salim Roukos</a>, <a href="/search/cs?searchtype=author&query=Zaharia%2C+M">Matei Zaharia</a>, <a href="/search/cs?searchtype=author&query=Potts%2C+C">Christopher Potts</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.01340v1-abstract-short" style="display: inline;"> Neural information retrieval (IR) systems have progressed rapidly in recent years, in large part due to the release of publicly available benchmarking tasks. Unfortunately, some dimensions of this progress are illusory: the majority of the popular IR benchmarks today focus exclusively on downstream task accuracy and thus conceal the costs incurred by systems that trade away efficiency for quality.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.01340v1-abstract-full').style.display = 'inline'; document.getElementById('2212.01340v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.01340v1-abstract-full" style="display: none;"> Neural information retrieval (IR) systems have progressed rapidly in recent years, in large part due to the release of publicly available benchmarking tasks. Unfortunately, some dimensions of this progress are illusory: the majority of the popular IR benchmarks today focus exclusively on downstream task accuracy and thus conceal the costs incurred by systems that trade away efficiency for quality. Latency, hardware cost, and other efficiency considerations are paramount to the deployment of IR systems in user-facing settings. We propose that IR benchmarks structure their evaluation methodology to include not only metrics of accuracy, but also efficiency considerations such as a query latency and the corresponding cost budget for a reproducible hardware setting. For the popular IR benchmarks MS MARCO and XOR-TyDi, we show how the best choice of IR system varies according to how these efficiency considerations are chosen and weighed. We hope that future benchmarks will adopt these guidelines toward more holistic IR evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.01340v1-abstract-full').style.display = 'none'; document.getElementById('2212.01340v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.00136">arXiv:2212.00136</a> <span> [<a href="https://arxiv.org/pdf/2212.00136">pdf</a>, <a href="https://arxiv.org/format/2212.00136">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DEL-Dock: Molecular Docking-Enabled Modeling of DNA-Encoded Libraries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shmilovich%2C+K">Kirill Shmilovich</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Benson Chen</a>, <a href="/search/cs?searchtype=author&query=Karaletsos%2C+T">Theofanis Karaletsos</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+M">Mohammad M. Sultan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.00136v2-abstract-short" style="display: inline;"> DNA-Encoded Library (DEL) technology has enabled significant advances in hit identification by enabling efficient testing of combinatorially-generated molecular libraries. DEL screens measure protein binding affinity though sequencing reads of molecules tagged with unique DNA-barcodes that survive a series of selection experiments. Computational models have been deployed to learn the latent bindin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.00136v2-abstract-full').style.display = 'inline'; document.getElementById('2212.00136v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.00136v2-abstract-full" style="display: none;"> DNA-Encoded Library (DEL) technology has enabled significant advances in hit identification by enabling efficient testing of combinatorially-generated molecular libraries. DEL screens measure protein binding affinity though sequencing reads of molecules tagged with unique DNA-barcodes that survive a series of selection experiments. Computational models have been deployed to learn the latent binding affinities that are correlated to the sequenced count data; however, this correlation is often obfuscated by various sources of noise introduced in its complicated data-generation process. In order to denoise DEL count data and screen for molecules with good binding affinity, computational models require the correct assumptions in their modeling structure to capture the correct signals underlying the data. Recent advances in DEL models have focused on probabilistic formulations of count data, but existing approaches have thus far been limited to only utilizing 2-D molecule-level representations. We introduce a new paradigm, DEL-Dock, that combines ligand-based descriptors with 3-D spatial information from docked protein-ligand complexes. 3-D spatial information allows our model to learn over the actual binding modality rather than using only structured-based information of the ligand. We show that our model is capable of effectively denoising DEL count data to predict molecule enrichment scores that are better correlated with experimental binding affinity measurements compared to prior works. Moreover, by learning over a collection of docked poses we demonstrate that our model, trained only on DEL data, implicitly learns to perform good docking pose selection without requiring external supervision from expensive-to-source protein crystal structures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.00136v2-abstract-full').style.display = 'none'; document.getElementById('2212.00136v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.16634">arXiv:2211.16634</a> <span> [<a href="https://arxiv.org/pdf/2211.16634">pdf</a>, <a href="https://arxiv.org/format/2211.16634">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SPARTAN: Sparse Hierarchical Memory for Parameter-Efficient Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Ferritto%2C+A">Anthony Ferritto</a>, <a href="/search/cs?searchtype=author&query=Kalyan%2C+A">Ashwin Kalyan</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+K">Karthik Narasimhan</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.16634v1-abstract-short" style="display: inline;"> Fine-tuning pre-trained language models (PLMs) achieves impressive performance on a range of downstream tasks, and their sizes have consequently been getting bigger. Since a different copy of the model is required for each task, this paradigm is infeasible for storage-constrained edge devices like mobile phones. In this paper, we propose SPARTAN, a parameter efficient (PE) and computationally fast… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.16634v1-abstract-full').style.display = 'inline'; document.getElementById('2211.16634v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.16634v1-abstract-full" style="display: none;"> Fine-tuning pre-trained language models (PLMs) achieves impressive performance on a range of downstream tasks, and their sizes have consequently been getting bigger. Since a different copy of the model is required for each task, this paradigm is infeasible for storage-constrained edge devices like mobile phones. In this paper, we propose SPARTAN, a parameter efficient (PE) and computationally fast architecture for edge devices that adds hierarchically organized sparse memory after each Transformer layer. SPARTAN freezes the PLM parameters and fine-tunes only its memory, thus significantly reducing storage costs by re-using the PLM backbone for different tasks. SPARTAN contains two levels of memory, with only a sparse subset of parents being chosen in the first level for each input, and children cells corresponding to those parents being used to compute an output representation. This sparsity combined with other architecture optimizations improves SPARTAN's throughput by over 90% during inference on a Raspberry Pi 4 when compared to PE baselines (adapters) while also outperforming the latter by 0.1 points on the GLUE benchmark. Further, it can be trained 34% faster in a few-shot setting, while performing within 0.9 points of adapters. Qualitative analysis shows that different parent cells in SPARTAN specialize in different topics, thus dividing responsibility efficiently. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.16634v1-abstract-full').style.display = 'none'; document.getElementById('2211.16634v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.01792">arXiv:2210.01792</a> <span> [<a href="https://arxiv.org/pdf/2210.01792">pdf</a>, <a href="https://arxiv.org/format/2210.01792">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Sampling Streaming Data with Parallel Vector Quantization -- PVQ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M">Mujahid Sultan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.01792v1-abstract-short" style="display: inline;"> Accumulation of corporate data in the cloud has attracted more enterprise applications to the cloud creating data gravity. As a consequence, network traffic has become more cloud centric. This increase in cloud centric traffic poses new challenges in designing learning systems for streaming data due to class imbalance. The number of classes plays a vital role in the accuracy of the classifiers bui… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.01792v1-abstract-full').style.display = 'inline'; document.getElementById('2210.01792v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.01792v1-abstract-full" style="display: none;"> Accumulation of corporate data in the cloud has attracted more enterprise applications to the cloud creating data gravity. As a consequence, network traffic has become more cloud centric. This increase in cloud centric traffic poses new challenges in designing learning systems for streaming data due to class imbalance. The number of classes plays a vital role in the accuracy of the classifiers built from the data streams. In this paper, we present a vector quantization-based sampling method, which substantially reduces the class imbalance in data streams. We demonstrate its effectiveness by conducting experiments on network traffic and anomaly dataset with commonly used ML model building methods; Multilayered Perceptron on TensorFlow backend, Support Vector Machines, K-Nearest Neighbour, and Random Forests. We built models using parallel processing, batch processing, and randomly selecting samples. We show that the accuracy of classification models improves when the data streams are pre-processed with our method. We used out of the box hyper-parameters of these classifiers and auto sklearn for hyperparameter optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.01792v1-abstract-full').style.display = 'none'; document.getElementById('2210.01792v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> F.2.2; I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.03703">arXiv:2208.03703</a> <span> [<a href="https://arxiv.org/pdf/2208.03703">pdf</a>, <a href="https://arxiv.org/format/2208.03703">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Granger Causality using Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M+S">Malik Shahid Sultan</a>, <a href="/search/cs?searchtype=author&query=Horvath%2C+S">Samuel Horvath</a>, <a href="/search/cs?searchtype=author&query=Ombao%2C+H">Hernando Ombao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.03703v2-abstract-short" style="display: inline;"> Dependence between nodes in a network is an important concept that pervades many areas including finance, politics, sociology, genomics and the brain sciences. One way to characterize dependence between components of a multivariate time series data is via Granger Causality (GC). Standard traditional approaches to GC estimation / inference commonly assume linear dynamics, however such simplificatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.03703v2-abstract-full').style.display = 'inline'; document.getElementById('2208.03703v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.03703v2-abstract-full" style="display: none;"> Dependence between nodes in a network is an important concept that pervades many areas including finance, politics, sociology, genomics and the brain sciences. One way to characterize dependence between components of a multivariate time series data is via Granger Causality (GC). Standard traditional approaches to GC estimation / inference commonly assume linear dynamics, however such simplification does not hold in many real-world applications where signals are inherently non-linear. In such cases, imposing linear models such as vector autoregressive (VAR) models can lead to mis-characterization of true Granger Causal interactions. To overcome this limitation, Tank et al (IEEE Transactions on Pattern Analysis and Machine Learning, 2022) proposed a solution that uses neural networks with sparse regularization penalties. The regularization encourages learnable weights to be sparse, which enables inference on GC. This paper overcomes the limitations of current methods by leveraging advances in machine learning and deep learning which have been demonstrated to learn hidden patterns in the data. We propose novel classes of models that can handle underlying non-linearity in a computationally efficient manner, simultaneously providing GC and lag order selection. Firstly, we present the Learned Kernel VAR (LeKVAR) model that learns kernel parameterized by a shared neural net followed by penalization on learnable weights to discover GC structure. Secondly, we show one can directly decouple lags and individual time series importance via decoupled penalties. This is important as we want to select the lag order during the process of GC estimation. This decoupling acts as a filtering and can be extended to any DL model including Multi-Layer Perceptrons (MLP), Recurrent Neural Networks (RNN), Long Short Term Memory Networks (LSTM), Transformers etc, for simultaneous GC estimation and lag selection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.03703v2-abstract-full').style.display = 'none'; document.getElementById('2208.03703v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be Submitted to a Journal work Presented at JSM. arXiv admin note: text overlap with arXiv:1802.05842 by other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.12615">arXiv:2206.12615</a> <span> [<a href="https://arxiv.org/pdf/2206.12615">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> Effects of MAC Parameters on the Performance of IEEE 802.11 DCF in NS-3 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Siddik%2C+M+A">Md. Abubakar Siddik</a>, <a href="/search/cs?searchtype=author&query=Nitu%2C+J+A">Jakia Akter Nitu</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+N">Natasha Islam</a>, <a href="/search/cs?searchtype=author&query=Hasi%2C+M+A+A">Most. Anju Ara Hasi</a>, <a href="/search/cs?searchtype=author&query=Ferdous%2C+J">Jannatun Ferdous</a>, <a href="/search/cs?searchtype=author&query=Rahman%2C+M+M">Md. Mizanur Rahman</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+N">Md. Nahid Sultan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.12615v1-abstract-short" style="display: inline;"> This paper presents the design procedure of the NS-3 script for WLAN that is organized according to the hierarchical manner of TCP/IP model. We configure all layers by using NS-3 model objects and set and modify the values used by objects to investigate the effects of MAC parameters (access mechanism, CWmin, CWmax and retry limit) on the performance metrics viz. packet delivery ratio, packet lost… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.12615v1-abstract-full').style.display = 'inline'; document.getElementById('2206.12615v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.12615v1-abstract-full" style="display: none;"> This paper presents the design procedure of the NS-3 script for WLAN that is organized according to the hierarchical manner of TCP/IP model. We configure all layers by using NS-3 model objects and set and modify the values used by objects to investigate the effects of MAC parameters (access mechanism, CWmin, CWmax and retry limit) on the performance metrics viz. packet delivery ratio, packet lost ratio, aggregated throughput, and average delay. The simulation results show that RTS/CTS access mechanism outperforms basic access mechanism in saturated state, whereas the MAC parameters have no significant impact on network performance in non-saturated state. A higher value of CWmin improves the aggregated throughput in expense of average delay. The tradeoff relationships among the performance metrics are also observed in results for the optimal values of MAC parameters. Our design procedure represents a good guideline for new NS-3 users to design and modify script and results greatly benefit the network design and management. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.12615v1-abstract-full').style.display = 'none'; document.getElementById('2206.12615v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.08441">arXiv:2206.08441</a> <span> [<a href="https://arxiv.org/pdf/2206.08441">pdf</a>, <a href="https://arxiv.org/format/2206.08441">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> GAAMA 2.0: An Integrated System that Answers Boolean and Extractive Questions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=McCarley%2C+S">Scott McCarley</a>, <a href="/search/cs?searchtype=author&query=Bornea%2C+M">Mihaela Bornea</a>, <a href="/search/cs?searchtype=author&query=Rosenthal%2C+S">Sara Rosenthal</a>, <a href="/search/cs?searchtype=author&query=Ferritto%2C+A">Anthony Ferritto</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a>, <a href="/search/cs?searchtype=author&query=Florian%2C+R">Radu Florian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.08441v2-abstract-short" style="display: inline;"> Recent machine reading comprehension datasets include extractive and boolean questions but current approaches do not offer integrated support for answering both question types. We present a multilingual machine reading comprehension system and front-end demo that handles boolean questions by providing both a YES/NO answer and highlighting supporting evidence, and handles extractive questions by hi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.08441v2-abstract-full').style.display = 'inline'; document.getElementById('2206.08441v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.08441v2-abstract-full" style="display: none;"> Recent machine reading comprehension datasets include extractive and boolean questions but current approaches do not offer integrated support for answering both question types. We present a multilingual machine reading comprehension system and front-end demo that handles boolean questions by providing both a YES/NO answer and highlighting supporting evidence, and handles extractive questions by highlighting the answer in the passage. Our system, GAAMA 2.0, is ranked first on the Tydi QA leaderboard at the time of this writing. We contrast two different implementations of our approach. The first includes several independent stacks of transformers allowing easy deployment of each component. The second is a single stack of transformers utilizing adapters to reduce GPU memory footprint in a resource-constrained environment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.08441v2-abstract-full').style.display = 'none'; document.getElementById('2206.08441v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.07257">arXiv:2205.07257</a> <span> [<a href="https://arxiv.org/pdf/2205.07257">pdf</a>, <a href="https://arxiv.org/format/2205.07257">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Not to Overfit or Underfit the Source Domains? An Empirical Study of Domain Generalization in Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a>, <a href="/search/cs?searchtype=author&query=Florian%2C+R">Radu Florian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.07257v3-abstract-short" style="display: inline;"> Machine learning models are prone to overfitting their training (source) domains, which is commonly believed to be the reason why they falter in novel target domains. Here we examine the contrasting view that multi-source domain generalization (DG) is first and foremost a problem of mitigating source domain underfitting: models not adequately learning the signal already present in their multi-doma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.07257v3-abstract-full').style.display = 'inline'; document.getElementById('2205.07257v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.07257v3-abstract-full" style="display: none;"> Machine learning models are prone to overfitting their training (source) domains, which is commonly believed to be the reason why they falter in novel target domains. Here we examine the contrasting view that multi-source domain generalization (DG) is first and foremost a problem of mitigating source domain underfitting: models not adequately learning the signal already present in their multi-domain training data. Experiments on a reading comprehension DG benchmark show that as a model learns its source domains better -- using familiar methods such as knowledge distillation (KD) from a bigger model -- its zero-shot out-of-domain utility improves at an even faster pace. Improved source domain learning also demonstrates superior out-of-domain generalization over three popular existing DG approaches that aim to limit overfitting. Our implementation of KD-based domain generalization is available via PrimeQA at: https://ibm.biz/domain-generalization-with-kd. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.07257v3-abstract-full').style.display = 'none'; document.getElementById('2205.07257v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.11373">arXiv:2204.11373</a> <span> [<a href="https://arxiv.org/pdf/2204.11373">pdf</a>, <a href="https://arxiv.org/format/2204.11373">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3477495.3531878">10.1145/3477495.3531878 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Entity-Conditioned Question Generation for Robust Attention Distribution in Neural Information Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+R+G">Revanth Gangi Reddy</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Franz%2C+M">Martin Franz</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+H">Heng Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.11373v1-abstract-short" style="display: inline;"> We show that supervised neural information retrieval (IR) models are prone to learning sparse attention patterns over passage tokens, which can result in key phrases including named entities receiving low attention weights, eventually leading to model under-performance. Using a novel targeted synthetic data generation method that identifies poorly attended entities and conditions the generation ep… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.11373v1-abstract-full').style.display = 'inline'; document.getElementById('2204.11373v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.11373v1-abstract-full" style="display: none;"> We show that supervised neural information retrieval (IR) models are prone to learning sparse attention patterns over passage tokens, which can result in key phrases including named entities receiving low attention weights, eventually leading to model under-performance. Using a novel targeted synthetic data generation method that identifies poorly attended entities and conditions the generation episodes on those, we teach neural IR to attend more uniformly and robustly to all entities in a given passage. On two public IR benchmarks, we empirically show that the proposed method helps improve both the model's attention patterns and retrieval performance, including in zero-shot settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.11373v1-abstract-full').style.display = 'none'; document.getElementById('2204.11373v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at SIGIR 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.09248">arXiv:2204.09248</a> <span> [<a href="https://arxiv.org/pdf/2204.09248">pdf</a>, <a href="https://arxiv.org/ps/2204.09248">ps</a>, <a href="https://arxiv.org/format/2204.09248">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Synthetic Target Domain Supervision for Open Retrieval QA </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+R+G">Revanth Gangi Reddy</a>, <a href="/search/cs?searchtype=author&query=Iyer%2C+B">Bhavani Iyer</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rong Zhang</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a>, <a href="/search/cs?searchtype=author&query=Castelli%2C+V">Vittorio Castelli</a>, <a href="/search/cs?searchtype=author&query=Florian%2C+R">Radu Florian</a>, <a href="/search/cs?searchtype=author&query=Roukos%2C+S">Salim Roukos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.09248v1-abstract-short" style="display: inline;"> Neural passage retrieval is a new and promising approach in open retrieval question answering. In this work, we stress-test the Dense Passage Retriever (DPR) -- a state-of-the-art (SOTA) open domain neural retrieval model -- on closed and specialized target domains such as COVID-19, and find that it lags behind standard BM25 in this important real-world setting. To make DPR more robust under domai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.09248v1-abstract-full').style.display = 'inline'; document.getElementById('2204.09248v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.09248v1-abstract-full" style="display: none;"> Neural passage retrieval is a new and promising approach in open retrieval question answering. In this work, we stress-test the Dense Passage Retriever (DPR) -- a state-of-the-art (SOTA) open domain neural retrieval model -- on closed and specialized target domains such as COVID-19, and find that it lags behind standard BM25 in this important real-world setting. To make DPR more robust under domain shift, we explore its fine-tuning with synthetic training examples, which we generate from unlabeled target domain text using a text-to-text generator. In our experiments, this noisy but fully automated target domain supervision gives DPR a sizable advantage over BM25 in out-of-domain settings, making it a more viable model in practice. Finally, an ensemble of BM25 and our improved DPR model yields the best results, further pushing the SOTA for open retrieval QA on multiple out-of-domain test sets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.09248v1-abstract-full').style.display = 'none'; document.getElementById('2204.09248v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at SIGIR 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.08185">arXiv:2112.08185</a> <span> [<a href="https://arxiv.org/pdf/2112.08185">pdf</a>, <a href="https://arxiv.org/format/2112.08185">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Learning Cross-Lingual IR from an English Retriever </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yulong Li</a>, <a href="/search/cs?searchtype=author&query=Franz%2C+M">Martin Franz</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Iyer%2C+B">Bhavani Iyer</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Young-Suk Lee</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.08185v3-abstract-short" style="display: inline;"> We present DR.DECR (Dense Retrieval with Distillation-Enhanced Cross-Lingual Representation), a new cross-lingual information retrieval (CLIR) system trained using multi-stage knowledge distillation (KD). The teacher of DR.DECR relies on a highly effective but computationally expensive two-stage inference process consisting of query translation and monolingual IR, while the student, DR.DECR, execu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.08185v3-abstract-full').style.display = 'inline'; document.getElementById('2112.08185v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.08185v3-abstract-full" style="display: none;"> We present DR.DECR (Dense Retrieval with Distillation-Enhanced Cross-Lingual Representation), a new cross-lingual information retrieval (CLIR) system trained using multi-stage knowledge distillation (KD). The teacher of DR.DECR relies on a highly effective but computationally expensive two-stage inference process consisting of query translation and monolingual IR, while the student, DR.DECR, executes a single CLIR step. We teach DR.DECR powerful multilingual representations as well as CLIR by optimizing two corresponding KD objectives. Learning useful representations of non-English text from an English-only retriever is accomplished through a cross-lingual token alignment algorithm that relies on the representation capabilities of the underlying multilingual encoders. In both in-domain and zero-shot out-of-domain evaluation, DR.DECR demonstrates far superior accuracy over direct fine-tuning with labeled CLIR data. It is also the best single-model retriever on the XOR-TyDi benchmark at the time of this writing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.08185v3-abstract-full').style.display = 'none'; document.getElementById('2112.08185v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at NAACL 2022 main conference Code can be found at: https://github.com/primeqa/primeqa</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.07800">arXiv:2104.07800</a> <span> [<a href="https://arxiv.org/pdf/2104.07800">pdf</a>, <a href="https://arxiv.org/format/2104.07800">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Towards Robust Neural Retrieval Models with Synthetic Pre-Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+R+G">Revanth Gangi Reddy</a>, <a href="/search/cs?searchtype=author&query=Yadav%2C+V">Vikas Yadav</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Franz%2C+M">Martin Franz</a>, <a href="/search/cs?searchtype=author&query=Castelli%2C+V">Vittorio Castelli</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+H">Heng Ji</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.07800v1-abstract-short" style="display: inline;"> Recent work has shown that commonly available machine reading comprehension (MRC) datasets can be used to train high-performance neural information retrieval (IR) systems. However, the evaluation of neural IR has so far been limited to standard supervised learning settings, where they have outperformed traditional term matching baselines. We conduct in-domain and out-of-domain evaluations of neura… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.07800v1-abstract-full').style.display = 'inline'; document.getElementById('2104.07800v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.07800v1-abstract-full" style="display: none;"> Recent work has shown that commonly available machine reading comprehension (MRC) datasets can be used to train high-performance neural information retrieval (IR) systems. However, the evaluation of neural IR has so far been limited to standard supervised learning settings, where they have outperformed traditional term matching baselines. We conduct in-domain and out-of-domain evaluations of neural IR, and seek to improve its robustness across different scenarios, including zero-shot settings. We show that synthetic training examples generated using a sequence-to-sequence generator can be effective towards this goal: in our experiments, pre-training with synthetic examples improves retrieval performance in both in-domain and out-of-domain evaluation on five different test sets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.07800v1-abstract-full').style.display = 'none'; document.getElementById('2104.07800v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.01143">arXiv:2102.01143</a> <span> [<a href="https://arxiv.org/pdf/2102.01143">pdf</a>, <a href="https://arxiv.org/format/2102.01143">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICTAI50040.2020.00178">10.1109/ICTAI50040.2020.00178 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> toon2real: Translating Cartoon Images to Realistic Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+K+M+A">K. M. Arefeen Sultan</a>, <a href="/search/cs?searchtype=author&query=Jubair%2C+M+I">Mohammad Imrul Jubair</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+N">MD. Nahidul Islam</a>, <a href="/search/cs?searchtype=author&query=Khan%2C+S+H">Sayed Hossain Khan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.01143v1-abstract-short" style="display: inline;"> In terms of Image-to-image translation, Generative Adversarial Networks (GANs) has achieved great success even when it is used in the unsupervised dataset. In this work, we aim to translate cartoon images to photo-realistic images using GAN. We apply several state-of-the-art models to perform this task; however, they fail to perform good quality translations. We observe that the shallow difference… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.01143v1-abstract-full').style.display = 'inline'; document.getElementById('2102.01143v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.01143v1-abstract-full" style="display: none;"> In terms of Image-to-image translation, Generative Adversarial Networks (GANs) has achieved great success even when it is used in the unsupervised dataset. In this work, we aim to translate cartoon images to photo-realistic images using GAN. We apply several state-of-the-art models to perform this task; however, they fail to perform good quality translations. We observe that the shallow difference between these two domains causes this issue. Based on this idea, we propose a method based on CycleGAN model for image translation from cartoon domain to photo-realistic domain. To make our model efficient, we implemented Spectral Normalization which added stability in our model. We demonstrate our experimental results and show that our proposed model has achieved the lowest Frechet Inception Distance score and better results compared to another state-of-the-art technique, UNIT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.01143v1-abstract-full').style.display = 'none'; document.getElementById('2102.01143v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as a short paper at ICTAI 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.01414">arXiv:2012.01414</a> <span> [<a href="https://arxiv.org/pdf/2012.01414">pdf</a>, <a href="https://arxiv.org/format/2012.01414">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> End-to-End QA on COVID-19: Domain Adaptation with Synthetic Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+R+G">Revanth Gangi Reddy</a>, <a href="/search/cs?searchtype=author&query=Iyer%2C+B">Bhavani Iyer</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rong Zhang</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avi Sil</a>, <a href="/search/cs?searchtype=author&query=Castelli%2C+V">Vittorio Castelli</a>, <a href="/search/cs?searchtype=author&query=Florian%2C+R">Radu Florian</a>, <a href="/search/cs?searchtype=author&query=Roukos%2C+S">Salim Roukos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.01414v1-abstract-short" style="display: inline;"> End-to-end question answering (QA) requires both information retrieval (IR) over a large document collection and machine reading comprehension (MRC) on the retrieved passages. Recent work has successfully trained neural IR systems using only supervised question answering (QA) examples from open-domain datasets. However, despite impressive performance on Wikipedia, neural IR lags behind traditional… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.01414v1-abstract-full').style.display = 'inline'; document.getElementById('2012.01414v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.01414v1-abstract-full" style="display: none;"> End-to-end question answering (QA) requires both information retrieval (IR) over a large document collection and machine reading comprehension (MRC) on the retrieved passages. Recent work has successfully trained neural IR systems using only supervised question answering (QA) examples from open-domain datasets. However, despite impressive performance on Wikipedia, neural IR lags behind traditional term matching approaches such as BM25 in more specific and specialized target domains such as COVID-19. Furthermore, given little or no labeled data, effective adaptation of QA systems can also be challenging in such target domains. In this work, we explore the application of synthetically generated QA examples to improve performance on closed-domain retrieval and MRC. We combine our neural IR and MRC systems and show significant improvements in end-to-end QA on the CORD-19 collection over a state-of-the-art open-domain QA baseline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.01414v1-abstract-full').style.display = 'none'; document.getElementById('2012.01414v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2011.03435">arXiv:2011.03435</a> <span> [<a href="https://arxiv.org/pdf/2011.03435">pdf</a>, <a href="https://arxiv.org/format/2011.03435">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Answer Span Correction in Machine Reading Comprehension </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+R+G">Revanth Gangi Reddy</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Kayi%2C+E+S">Efsun Sarioglu Kayi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rong Zhang</a>, <a href="/search/cs?searchtype=author&query=Castelli%2C+V">Vittorio Castelli</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2011.03435v1-abstract-short" style="display: inline;"> Answer validation in machine reading comprehension (MRC) consists of verifying an extracted answer against an input context and question pair. Previous work has looked at re-assessing the "answerability" of the question given the extracted answer. Here we address a different problem: the tendency of existing MRC systems to produce partially correct answers when presented with answerable questions.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.03435v1-abstract-full').style.display = 'inline'; document.getElementById('2011.03435v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2011.03435v1-abstract-full" style="display: none;"> Answer validation in machine reading comprehension (MRC) consists of verifying an extracted answer against an input context and question pair. Previous work has looked at re-assessing the "answerability" of the question given the extracted answer. Here we address a different problem: the tendency of existing MRC systems to produce partially correct answers when presented with answerable questions. We explore the nature of such errors and propose a post-processing correction method that yields statistically significant performance improvements over state-of-the-art MRC systems in both monolingual and multilingual evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.03435v1-abstract-full').style.display = 'none'; document.getElementById('2011.03435v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in Findings of EMNLP 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.12776">arXiv:2010.12776</a> <span> [<a href="https://arxiv.org/pdf/2010.12776">pdf</a>, <a href="https://arxiv.org/format/2010.12776">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Improved Synthetic Training for Reading Comprehension </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yanda Chen</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Castelli%2C+V">Vittorio Castelli</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.12776v1-abstract-short" style="display: inline;"> Automatically generated synthetic training examples have been shown to improve performance in machine reading comprehension (MRC). Compared to human annotated gold standard data, synthetic training data has unique properties, such as high availability at the possible expense of quality. In view of such differences, in this paper, we explore novel applications of synthetic examples to MRC. Our prop… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.12776v1-abstract-full').style.display = 'inline'; document.getElementById('2010.12776v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.12776v1-abstract-full" style="display: none;"> Automatically generated synthetic training examples have been shown to improve performance in machine reading comprehension (MRC). Compared to human annotated gold standard data, synthetic training data has unique properties, such as high availability at the possible expense of quality. In view of such differences, in this paper, we explore novel applications of synthetic examples to MRC. Our proposed pre-training and knowledge distillation strategies show significant improvements over existing methods. In a particularly surprising discovery, we observe that synthetic distillation often yields students that can outperform the teacher model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.12776v1-abstract-full').style.display = 'none'; document.getElementById('2010.12776v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.05904">arXiv:2010.05904</a> <span> [<a href="https://arxiv.org/pdf/2010.05904">pdf</a>, <a href="https://arxiv.org/format/2010.05904">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Multi-Stage Pre-training for Low-Resource Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rong Zhang</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+R+G">Revanth Gangi Reddy</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Castelli%2C+V">Vittorio Castelli</a>, <a href="/search/cs?searchtype=author&query=Ferritto%2C+A">Anthony Ferritto</a>, <a href="/search/cs?searchtype=author&query=Florian%2C+R">Radu Florian</a>, <a href="/search/cs?searchtype=author&query=Kayi%2C+E+S">Efsun Sarioglu Kayi</a>, <a href="/search/cs?searchtype=author&query=Roukos%2C+S">Salim Roukos</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a>, <a href="/search/cs?searchtype=author&query=Ward%2C+T">Todd Ward</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.05904v1-abstract-short" style="display: inline;"> Transfer learning techniques are particularly useful in NLP tasks where a sizable amount of high-quality annotated data is difficult to obtain. Current approaches directly adapt a pre-trained language model (LM) on in-domain text before fine-tuning to downstream tasks. We show that extending the vocabulary of the LM with domain-specific terms leads to further gains. To a bigger effect, we utilize… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.05904v1-abstract-full').style.display = 'inline'; document.getElementById('2010.05904v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.05904v1-abstract-full" style="display: none;"> Transfer learning techniques are particularly useful in NLP tasks where a sizable amount of high-quality annotated data is difficult to obtain. Current approaches directly adapt a pre-trained language model (LM) on in-domain text before fine-tuning to downstream tasks. We show that extending the vocabulary of the LM with domain-specific terms leads to further gains. To a bigger effect, we utilize structure in the unlabeled data to create auxiliary synthetic tasks, which helps the LM transfer to downstream tasks. We apply these approaches incrementally on a pre-trained Roberta-large LM and show considerable performance gain on three tasks in the IT domain: Extractive Reading Comprehension, Document Ranking and Duplicate Question Detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.05904v1-abstract-full').style.display = 'none'; document.getElementById('2010.05904v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.08492">arXiv:2009.08492</a> <span> [<a href="https://arxiv.org/pdf/2009.08492">pdf</a>, <a href="https://arxiv.org/format/2009.08492">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Designing knowledge plane to optimize leaf and spine data center </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M">Mujahid Sultan</a>, <a href="/search/cs?searchtype=author&query=Imbuido%2C+D">Dodi Imbuido</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+K">Kam Patel</a>, <a href="/search/cs?searchtype=author&query=MacDonald%2C+J">James MacDonald</a>, <a href="/search/cs?searchtype=author&query=Ratnam%2C+K">Kumar Ratnam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.08492v1-abstract-short" style="display: inline;"> In the last few decades, data center architecture evolved from the traditional client-server to access-aggregation-core architectures. Recently there is a new shift in the data center architecture due to the increasing need for low latency and high throughput between server-to-server communications, load balancing and, loop-free environment. This new architecture, known as leaf and spine architect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.08492v1-abstract-full').style.display = 'inline'; document.getElementById('2009.08492v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.08492v1-abstract-full" style="display: none;"> In the last few decades, data center architecture evolved from the traditional client-server to access-aggregation-core architectures. Recently there is a new shift in the data center architecture due to the increasing need for low latency and high throughput between server-to-server communications, load balancing and, loop-free environment. This new architecture, known as leaf and spine architecture, provides low latency and minimum packet loss by enabling the addition and deletion of network nodes on demand. Network nodes can be added or deleted from the network based on network statistics like link speed, packet loss, latency, and throughput. With the maturity of Open Virtual Switch (OvS) and OpenFlow based Software Defined Network (SDN) controllers, network automation through programmatic extensions has become possible based on network statistics. The separation of the control plane and data plane has enabled automated management of network and Machine Learning (ML) can be applied to learn and optimize the network. In this publication, we propose the design of an ML-based approach to gather network statistics and build a knowledge plane. We demonstrate that this knowledge plane enables data center optimization using southbound APIs and SDN controllers. We describe the design components of this approach - using a network simulator and show that it can maintain the historical patterns of network statistics to predict future growth or decline. We also provide an open-source software that can be utilized in a leaf and spine data center to provide elastic capacity based on load forecasts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.08492v1-abstract-full').style.display = 'none'; document.getElementById('2009.08492v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">3 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.01702">arXiv:2009.01702</a> <span> [<a href="https://arxiv.org/pdf/2009.01702">pdf</a>, <a href="https://arxiv.org/format/2009.01702">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Linking Stakeholders' Viewpoint Concerns and Microservices-based Architecture </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M">Mujahid Sultan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.01702v4-abstract-short" style="display: inline;"> Widespread adoption of agile project management, independent delivery with microservices, and automated deployment with DevOps has tremendously speedup the systems development. The real game-changer is continuous integration (CI), continuous delivery, and continuous deployment (CD). Organizations can do multiple releases a day, shortening the test, release, and deployment cycles from weeks to minu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.01702v4-abstract-full').style.display = 'inline'; document.getElementById('2009.01702v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.01702v4-abstract-full" style="display: none;"> Widespread adoption of agile project management, independent delivery with microservices, and automated deployment with DevOps has tremendously speedup the systems development. The real game-changer is continuous integration (CI), continuous delivery, and continuous deployment (CD). Organizations can do multiple releases a day, shortening the test, release, and deployment cycles from weeks to minutes. Maturity of container technologies like Docker and container orchestration platforms like Kubernetes has promoted microservices architecture, especially in the cloud-native developments. Various tools are available for setting up CI/CD pipelines. Organizations are moving away from monolith applications and moving towards microservices-based architectures. Organizations can quickly accumulate hundreds of such microservices accessible via application programming interfaces (APIs). The primary purpose of these modern methodologies is agility, speed, and reusability. While DevOps offers speed and time to market, agility and reusability may not be guaranteed unless microservices and API's are linked to enterprise-wide stakeholders' needs. The link between business needs and microservices/APIs is not well captured nor adequately defined. In this publication, we describe a structured method to create a logical link among APIs and microservices-based agile developments with enterprise stakeholders' needs and viewpoint concerns. This method enables capturing and documenting enterprise-wide stakeholders' needs, whether these are business owners, planners (product owners, architects), designers (developers, DevOps engineers), or the partners and subscribers of an enterprise. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.01702v4-abstract-full').style.display = 'none'; document.getElementById('2009.01702v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.09123">arXiv:2005.09123</a> <span> [<a href="https://arxiv.org/pdf/2005.09123">pdf</a>, <a href="https://arxiv.org/ps/2005.09123">ps</a>, <a href="https://arxiv.org/format/2005.09123">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GPT-too: A language-model-first approach for AMR-to-text generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mager%2C+M">Manuel Mager</a>, <a href="/search/cs?searchtype=author&query=Astudillo%2C+R+F">Ramon Fernandez Astudillo</a>, <a href="/search/cs?searchtype=author&query=Naseem%2C+T">Tahira Naseem</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Young-Suk Lee</a>, <a href="/search/cs?searchtype=author&query=Florian%2C+R">Radu Florian</a>, <a href="/search/cs?searchtype=author&query=Roukos%2C+S">Salim Roukos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.09123v2-abstract-short" style="display: inline;"> Meaning Representations (AMRs) are broad-coverage sentence-level semantic graphs. Existing approaches to generating text from AMR have focused on training sequence-to-sequence or graph-to-sequence models on AMR annotated data only. In this paper, we propose an alternative approach that combines a strong pre-trained language model with cycle consistency-based re-scoring. Despite the simplicity of t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.09123v2-abstract-full').style.display = 'inline'; document.getElementById('2005.09123v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.09123v2-abstract-full" style="display: none;"> Meaning Representations (AMRs) are broad-coverage sentence-level semantic graphs. Existing approaches to generating text from AMR have focused on training sequence-to-sequence or graph-to-sequence models on AMR annotated data only. In this paper, we propose an alternative approach that combines a strong pre-trained language model with cycle consistency-based re-scoring. Despite the simplicity of the approach, our experimental results show these models outperform all previous techniques on the English LDC2017T10dataset, including the recent use of transformer architectures. In addition to the standard evaluation metrics, we provide human evaluation experiments that further substantiate the strength of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.09123v2-abstract-full').style.display = 'none'; document.getElementById('2005.09123v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Paper accepted to the Annual Meeting of the Association for Computational Linguistics (ACL 2020)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2003.04372">arXiv:2003.04372</a> <span> [<a href="https://arxiv.org/pdf/2003.04372">pdf</a>, <a href="https://arxiv.org/format/2003.04372">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Probabilistic Partitive Partitioning (PPP) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M">Mujahid Sultan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2003.04372v1-abstract-short" style="display: inline;"> Clustering is a NP-hard problem. Thus, no optimal algorithm exists, heuristics are applied to cluster the data. Heuristics can be very resource-intensive, if not applied properly. For substantially large data sets computational efficiencies can be achieved by reducing the input space if a minimal loss of information can be achieved. Clustering algorithms, in general, face two common problems: 1) t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.04372v1-abstract-full').style.display = 'inline'; document.getElementById('2003.04372v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2003.04372v1-abstract-full" style="display: none;"> Clustering is a NP-hard problem. Thus, no optimal algorithm exists, heuristics are applied to cluster the data. Heuristics can be very resource-intensive, if not applied properly. For substantially large data sets computational efficiencies can be achieved by reducing the input space if a minimal loss of information can be achieved. Clustering algorithms, in general, face two common problems: 1) these converge to different settings with different initial conditions and; 2) the number of clusters has to be arbitrarily decided beforehand. This problem has become critical in the realm of big data. Recently, clustering algorithms have emerged which can speedup computations using parallel processing over the grid but face the aforementioned problems. Goals: Our goals are to find methods to cluster data which: 1) guarantee convergence to the same settings irrespective of the initial conditions; 2) eliminate the need to establish the number of clusters beforehand, and 3) can be applied to cluster large datasets. Methods: We introduce a method that combines probabilistic and combinatorial clustering methods to produce repeatable and compact clusters that are not sensitive to initial conditions. This method harnesses the power of k-means (a combinatorial clustering method) to cluster/partition very large dimensional datasets and uses the Gaussian Mixture Model (a probabilistic clustering method) to validate the k-means partitions. Results: We show that this method produces very compact clusters that are not sensitive to initial conditions. This method can be used to identify the most 'separable' set in a dataset which increases the 'clusterability' of a dataset. This method also eliminates the need to specify the number of clusters in advance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.04372v1-abstract-full').style.display = 'none'; document.getElementById('2003.04372v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1811.11796">arXiv:1811.11796</a> <span> </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Cartoon-to-real: An Approach to Translate Cartoon to Realistic Images using GAN </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+K+M+A">K M Arefeen Sultan</a>, <a href="/search/cs?searchtype=author&query=Rupty%2C+L+K">Labiba Kanij Rupty</a>, <a href="/search/cs?searchtype=author&query=Pranto%2C+N+I">Nahidul Islam Pranto</a>, <a href="/search/cs?searchtype=author&query=Shuvo%2C+S+K">Sayed Khan Shuvo</a>, <a href="/search/cs?searchtype=author&query=Jubair%2C+M+I">Mohammad Imrul Jubair</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1811.11796v3-abstract-short" style="display: inline;"> We propose a method to translate cartoon images to real world images using Generative Aderserial Network (GAN). Existing GAN-based image-to-image translation methods which are trained on paired datasets are impractical as the data is difficult to accumulate. Therefore, in this paper we exploit the Cycle-Consistent Adversarial Networks (CycleGAN) method for images translation which needs an unpaire… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.11796v3-abstract-full').style.display = 'inline'; document.getElementById('1811.11796v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1811.11796v3-abstract-full" style="display: none;"> We propose a method to translate cartoon images to real world images using Generative Aderserial Network (GAN). Existing GAN-based image-to-image translation methods which are trained on paired datasets are impractical as the data is difficult to accumulate. Therefore, in this paper we exploit the Cycle-Consistent Adversarial Networks (CycleGAN) method for images translation which needs an unpaired dataset. By applying CycleGAN we show that our model is able to generate meaningful real world images from cartoon images. However, we implement another state of the art technique $-$ Deep Analogy $-$ to compare the performance of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.11796v3-abstract-full').style.display = 'none'; document.getElementById('1811.11796v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This is an ongoing work and this draft contains the future plan to accomplish the tasks</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1802.10548">arXiv:1802.10548</a> <span> [<a href="https://arxiv.org/pdf/1802.10548">pdf</a>, <a href="https://arxiv.org/format/1802.10548">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Using Deep Learning for Segmentation and Counting within Microscopy Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hern%C3%A1ndez%2C+C+X">Carlos X. Hern谩ndez</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+M">Mohammad M. Sultan</a>, <a href="/search/cs?searchtype=author&query=Pande%2C+V+S">Vijay S. Pande</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1802.10548v1-abstract-short" style="display: inline;"> Cell counting is a ubiquitous, yet tedious task that would greatly benefit from automation. From basic biological questions to clinical trials, cell counts provide key quantitative feedback that drive research. Unfortunately, cell counting is most commonly a manual task and can be time-intensive. The task is made even more difficult due to overlapping cells, existence of multiple focal planes, and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.10548v1-abstract-full').style.display = 'inline'; document.getElementById('1802.10548v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1802.10548v1-abstract-full" style="display: none;"> Cell counting is a ubiquitous, yet tedious task that would greatly benefit from automation. From basic biological questions to clinical trials, cell counts provide key quantitative feedback that drive research. Unfortunately, cell counting is most commonly a manual task and can be time-intensive. The task is made even more difficult due to overlapping cells, existence of multiple focal planes, and poor imaging quality, among other factors. Here, we describe a convolutional neural network approach, using a recently described feature pyramid network combined with a VGG-style neural network, for segmenting and subsequent counting of cells in a given microscopy image. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.10548v1-abstract-full').style.display = 'none'; document.getElementById('1802.10548v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1802.10510">arXiv:1802.10510</a> <span> [<a href="https://arxiv.org/pdf/1802.10510">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Automated design of collective variables using supervised machine learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M+M">Mohammad M. Sultan</a>, <a href="/search/cs?searchtype=author&query=Pande%2C+V+S">Vijay S. Pande</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1802.10510v2-abstract-short" style="display: inline;"> Selection of appropriate collective variables for enhancing sampling of molecular simulations remains an unsolved problem in computational biophysics. In particular, picking initial collective variables (CVs) is particularly challenging in higher dimensions. Which atomic coordinates or transforms there of from a list of thousands should one pick for enhanced sampling runs? How does a modeler even… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.10510v2-abstract-full').style.display = 'inline'; document.getElementById('1802.10510v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1802.10510v2-abstract-full" style="display: none;"> Selection of appropriate collective variables for enhancing sampling of molecular simulations remains an unsolved problem in computational biophysics. In particular, picking initial collective variables (CVs) is particularly challenging in higher dimensions. Which atomic coordinates or transforms there of from a list of thousands should one pick for enhanced sampling runs? How does a modeler even begin to pick starting coordinates for investigation? This remains true even in the case of simple two state systems and only increases in difficulty for multi-state systems. In this work, we solve the initial CV problem using a data-driven approach inspired by the filed of supervised machine learning. In particular, we show how the decision functions in supervised machine learning (SML) algorithms can be used as initial CVs (SML_cv) for accelerated sampling. Using solvated alanine dipeptide and Chignolin mini-protein as our test cases, we illustrate how the distance to the Support Vector Machines' decision hyperplane, the output probability estimates from Logistic Regression, the outputs from deep neural network classifiers, and other classifiers may be used to reversibly sample slow structural transitions. We discuss the utility of other SML algorithms that might be useful for identifying CVs for accelerating molecular simulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.10510v2-abstract-full').style.display = 'none'; document.getElementById('1802.10510v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 February, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1706.06322">arXiv:1706.06322</a> <span> [<a href="https://arxiv.org/pdf/1706.06322">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Evaluation of energy consumption of reactive and proactive routing protocols in MANET </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M+T">Mohamad T. Sultan</a>, <a href="/search/cs?searchtype=author&query=Zaki%2C+S+M">Salim M. Zaki</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1706.06322v1-abstract-short" style="display: inline;"> Mobile Ad hoc Network (MANET) is a distributed, infrastructure-less and decentralized network. A routing protocol in MANET is used to find routes between mobile nodes to facilitate communication within the network. Numerous routing protocols have been proposed for MANET. Those routing protocols are designed to adaptively accommodate for dynamic unpredictable changes in network's topology. The mobi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1706.06322v1-abstract-full').style.display = 'inline'; document.getElementById('1706.06322v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1706.06322v1-abstract-full" style="display: none;"> Mobile Ad hoc Network (MANET) is a distributed, infrastructure-less and decentralized network. A routing protocol in MANET is used to find routes between mobile nodes to facilitate communication within the network. Numerous routing protocols have been proposed for MANET. Those routing protocols are designed to adaptively accommodate for dynamic unpredictable changes in network's topology. The mobile nodes in MANET are often powered by limited batteries and network lifetime relies heavily on the energy consumption of nodes. In consequence, the lack of a mobile node can lead to network partitioning. In this paper we analyse, evaluate and measure the energy efficiency of three prominent MANET routing protocols namely DSR, AODV and OLSR in addition to modified protocols. These routing protocols follow the reactive and the proactive routing schemes. A discussion and comparison highlighting their particular merits and drawbacks are also presented. Evaluation study and simulations are performed using NS-2 and its accompanying tools for analysis and investigation of results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1706.06322v1-abstract-full').style.display = 'none'; document.getElementById('1706.06322v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1509.07360">arXiv:1509.07360</a> <span> [<a href="https://arxiv.org/pdf/1509.07360">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/RePa.2015.7407731">10.1109/RePa.2015.7407731 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Ordering stakeholder viewpoint concerns for holistic and incremental Enterprise Architecture: the W6H framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M">Mujahid Sultan</a>, <a href="/search/cs?searchtype=author&query=Miranskyy%2C+A">Andriy Miranskyy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1509.07360v1-abstract-short" style="display: inline;"> Context: Enterprise Architecture (EA) is a discipline which has evolved to structure the business and its alignment with the IT systems. One of the popular enterprise architecture frameworks is Zachman framework (ZF). This framework focuses on describing the enterprise from six viewpoint perspectives of the stakeholders. These six perspectives are based on English language interrogatives 'what', '… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1509.07360v1-abstract-full').style.display = 'inline'; document.getElementById('1509.07360v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1509.07360v1-abstract-full" style="display: none;"> Context: Enterprise Architecture (EA) is a discipline which has evolved to structure the business and its alignment with the IT systems. One of the popular enterprise architecture frameworks is Zachman framework (ZF). This framework focuses on describing the enterprise from six viewpoint perspectives of the stakeholders. These six perspectives are based on English language interrogatives 'what', 'where', 'who', 'when', 'why', and 'how' (thus the term W5H Journalists and police investigators use the W5H to describe an event. However, EA is not an event, creation and evolution of EA challenging. Moreover, the ordering of viewpoints is not defined in the existing EA frameworks, making data capturing process difficult. Our goals are to 1) assess if W5H is sufficient to describe modern EA and 2) explore the ordering and precedence among the viewpoint concerns. Method: we achieve our goals by bringing tools from the Linguistics, focusing on a full set of English Language interrogatives to describe viewpoint concerns and the inter-relationships and dependencies among these. Application of these tools is validated using pedagogical EA examples. Results: 1) We show that addition of the seventh interrogative 'which' to the W5H set (we denote this extended set as W6H) yields extra and necessary information enabling creation of holistic EA. 2) We discover that particular ordering of the interrogatives, established by linguists (based on semantic and lexical analysis of English language interrogatives), define starting points and the order in which viewpoints should be arranged for creating complete EA. 3) We prove that adopting W6H enables creation of EA for iterative and agile SDLCs, e.g. Scrum. Conclusions: We believe that our findings complete creation of EA using ZF by practitioners, and provide theoreticians with tools needed to improve other EA frameworks, e.g., TOGAF and DoDAF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1509.07360v1-abstract-full').style.display = 'none'; document.getElementById('1509.07360v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Due to ArXiv constraint 'The abstract field cannot be longer than 1,920 characters', the abstract appearing here is shorter than the one in the manuscript</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proc. of IEEE Fifth International Workshop on Requirements Patterns (RePa), 2015, pp. 1-8 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1508.01954">arXiv:1508.01954</a> <span> [<a href="https://arxiv.org/pdf/1508.01954">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/RePa.2015.7407731">10.1109/RePa.2015.7407731 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Ordering Interrogative Questions for Effective Requirements Engineering: The W6H Pattern </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sultan%2C+M">Mujahid Sultan</a>, <a href="/search/cs?searchtype=author&query=Miranskyy%2C+A">Andriy Miranskyy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1508.01954v1-abstract-short" style="display: inline;"> Requirements elicitation and requirements analysis are important practices of Requirements Engineering. Elicitation techniques, such as interviews and questionnaires, rely on formulating interrogative questions and asking these in a proper order to maximize the accuracy of the information being gathered. Information gathered during requirements elicitation then has to be interpreted, analyzed, and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1508.01954v1-abstract-full').style.display = 'inline'; document.getElementById('1508.01954v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1508.01954v1-abstract-full" style="display: none;"> Requirements elicitation and requirements analysis are important practices of Requirements Engineering. Elicitation techniques, such as interviews and questionnaires, rely on formulating interrogative questions and asking these in a proper order to maximize the accuracy of the information being gathered. Information gathered during requirements elicitation then has to be interpreted, analyzed, and validated. Requirements analysis involves analyzing the problem and solutions spaces. In this paper, we describe a method to formulate interrogative questions for effective requirements elicitation based on the lexical and semantic principles of the English language interrogatives, and propose a pattern to organize stakeholder viewpoint concerns for better requirements analysis. This helps requirements engineer thoroughly describe problem and solutions spaces. Most of the previous requirements elicitation studies included six out of the seven English language interrogatives 'what', 'where', 'when', 'who', 'why', and 'how' (denoted by W5H) and did not propose any order in the interrogatives. We show that extending the set of six interrogatives with 'which' (denoted by W6H) improves the generation and formulation of questions for requirements elicitation and facilitates better requirements analysis via arranging stakeholder views. We discuss the interdependencies among interrogatives (for requirements engineer to consider while eliciting the requirements) and suggest an order for the set of W6H interrogatives. The proposed W6H-based reusable pattern also aids requirements engineer in organizing viewpoint concerns of stakeholders, making this pattern an effective tool for requirements analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1508.01954v1-abstract-full').style.display = 'none'; document.getElementById('1508.01954v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> In Proceedings of the 2015 IEEE Fifth International Workshop on Requirements Patterns (RePa) (REPA '15). IEEE Computer Society, Washington, DC, USA, 1-8 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1302.2748">arXiv:1302.2748</a> <span> [<a href="https://arxiv.org/pdf/1302.2748">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> A Systematic Literature Review on relationship between agile methods and Open Source Software Development methodology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gandomani%2C+T+J">Taghi Javdani Gandomani</a>, <a href="/search/cs?searchtype=author&query=Zulzalil%2C+H">Hazura Zulzalil</a>, <a href="/search/cs?searchtype=author&query=Ghani%2C+A+A+A">Abdul Azim Abdul Ghani</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+A+B+M">Abu Bakar Md Sultan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1302.2748v1-abstract-short" style="display: inline;"> Agile software development methods (ASD) and open source software development methods (OSSD) are two different approaches which were introduced in last decade and both of them have their fanatical advocators. Yet, it seems that relation and interface between ASD and OSSD is a fertile area and few rigorous studies have been done in this matter. Major goal of this study was assessment of the relatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1302.2748v1-abstract-full').style.display = 'inline'; document.getElementById('1302.2748v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1302.2748v1-abstract-full" style="display: none;"> Agile software development methods (ASD) and open source software development methods (OSSD) are two different approaches which were introduced in last decade and both of them have their fanatical advocators. Yet, it seems that relation and interface between ASD and OSSD is a fertile area and few rigorous studies have been done in this matter. Major goal of this study was assessment of the relation and integration of ASD and OSSD. Analyzing of collected data shows that ASD and OSSD are able to support each other. Some practices in one of them are useful in the other. Another finding is that however there are some case studies using ASD and OSSD simultaneously, but there is not enough evidence about comprehensive integration of them. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1302.2748v1-abstract-full').style.display = 'none'; document.getElementById('1302.2748v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 5 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Taghi Javdani Gandomani, at al., A systematic literature review on relationship between agile SD and open source SD, International review on computers and software (IRECOS), 2012, Vol. 7, Issue 4, pp. 1602-1607 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1302.2747">arXiv:1302.2747</a> <span> [<a href="https://arxiv.org/pdf/1302.2747">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Effective factors in agile transformation process from change management perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gandomani%2C+T+J">Taghi Javdani Gandomani</a>, <a href="/search/cs?searchtype=author&query=Zulzalil%2C+H">Hazura Zulzalil</a>, <a href="/search/cs?searchtype=author&query=Ghani%2C+A+A+A">Abdul Azim Abdul Ghani</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+A+B+M">Abu Bakar Md. Sultan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1302.2747v1-abstract-short" style="display: inline;"> After introducing agile approach in 2001, several agile methods were founded over the last decade. Agile values such as customer collaboration, embracing changes, iteration and frequent delivery, continuous integration, etc. motivate all software stakeholders to use these methods in their projects. The main issue is that for using these methods instead of traditional methods in software developmen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1302.2747v1-abstract-full').style.display = 'inline'; document.getElementById('1302.2747v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1302.2747v1-abstract-full" style="display: none;"> After introducing agile approach in 2001, several agile methods were founded over the last decade. Agile values such as customer collaboration, embracing changes, iteration and frequent delivery, continuous integration, etc. motivate all software stakeholders to use these methods in their projects. The main issue is that for using these methods instead of traditional methods in software development, companies should change their approach from traditional to agile. This change is a fundamental and critical mutation. Several studies have been done for investigating of barriers, challenges and issues in agile movement process and also in how to use agile methods in companies. The main issue is altering attitude from traditional to agile approach. We believe that before managing agile transformation process, its related factors should be studied in deep. This study focuses on different dimensions of changing approach to agile from change management perspective. These factors are how to being agile, method selection and awareness of challenges and issues. These fundamental factors encompass many items for agile movement and adoption process. However these factors may change in different organization, but they should be studied in deep before any action plan for designing a change strategy. The main contribution of this paper is introducing and these factors and discuss on them deeply. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1302.2747v1-abstract-full').style.display = 'none'; document.getElementById('1302.2747v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Taghi Javdani Gandomani, et al., Effective factors in agile transformation process from change management perspective, 2nd Int. Conf. on Advance Information System, E-Education & Development (CAISED 2013), Jan. 2013, Kuala Lumpur, Malaysia </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Sultan%2C+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Sultan%2C+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Sultan%2C+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>