CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;20 of 20 results for author: <span class="mathjax">Fahim, M</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Fahim%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Fahim, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Fahim%2C+M&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Fahim, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15017">arXiv:2410.15017</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.15017">pdf</a>, <a href="https://arxiv.org/format/2410.15017">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> DM-Codec: Distilling Multimodal Representations for Speech Tokenization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ahasan%2C+M+M">Md Mubtasim Ahasan</a>, <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M">Md Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Mohiuddin%2C+T">Tasnim Mohiuddin</a>, <a href="/search/cs?searchtype=author&amp;query=Rahman%2C+A+K+M+M">A K M Mahbubur Rahman</a>, <a href="/search/cs?searchtype=author&amp;query=Chadha%2C+A">Aman Chadha</a>, <a href="/search/cs?searchtype=author&amp;query=Iqbal%2C+T">Tariq Iqbal</a>, <a href="/search/cs?searchtype=author&amp;query=Amin%2C+M+A">M Ashraful Amin</a>, <a href="/search/cs?searchtype=author&amp;query=Islam%2C+M+M">Md Mofijul Islam</a>, <a href="/search/cs?searchtype=author&amp;query=Ali%2C+A+A">Amin Ahsan Ali</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15017v1-abstract-short" style="display: inline;"> Recent advancements in speech-language models have yielded significant improvements in speech tokenization and synthesis. However, effectively mapping the complex, multidimensional attributes of speech into discrete tokens remains challenging. This process demands acoustic, semantic, and contextual information for precise speech representations. Existing speech representations generally fall into&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15017v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15017v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15017v1-abstract-full" style="display: none;"> Recent advancements in speech-language models have yielded significant improvements in speech tokenization and synthesis. However, effectively mapping the complex, multidimensional attributes of speech into discrete tokens remains challenging. This process demands acoustic, semantic, and contextual information for precise speech representations. Existing speech representations generally fall into two categories: acoustic tokens from audio codecs and semantic tokens from speech self-supervised learning models. Although recent efforts have unified acoustic and semantic tokens for improved performance, they overlook the crucial role of contextual representation in comprehensive speech modeling. Our empirical investigations reveal that the absence of contextual representations results in elevated Word Error Rate (WER) and Word Information Lost (WIL) scores in speech transcriptions. To address these limitations, we propose two novel distillation approaches: (1) a language model (LM)-guided distillation method that incorporates contextual information, and (2) a combined LM and self-supervised speech model (SM)-guided distillation technique that effectively distills multimodal representations (acoustic, semantic, and contextual) into a comprehensive speech tokenizer, termed DM-Codec. The DM-Codec architecture adopts a streamlined encoder-decoder framework with a Residual Vector Quantizer (RVQ) and incorporates the LM and SM during the training process. Experiments show DM-Codec significantly outperforms state-of-the-art speech tokenization models, reducing WER by up to 13.46%, WIL by 9.82%, and improving speech quality by 5.84% and intelligibility by 1.85% on the LibriSpeech benchmark dataset. The code, samples, and model checkpoints are available at https://github.com/mubtasimahasan/DM-Codec. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15017v1-abstract-full').style.display = 'none'; document.getElementById('2410.15017v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14991">arXiv:2410.14991</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.14991">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ChitroJera: A Regionally Relevant Visual Question Answering Dataset for Bangla </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Barua%2C+D+D">Deeparghya Dutta Barua</a>, <a href="/search/cs?searchtype=author&amp;query=Sourove%2C+M+S+U+R">Md Sakib Ul Rahman Sourove</a>, <a href="/search/cs?searchtype=author&amp;query=Ishmam%2C+M+F">Md Farhan Ishmam</a>, <a href="/search/cs?searchtype=author&amp;query=Haider%2C+F">Fabiha Haider</a>, <a href="/search/cs?searchtype=author&amp;query=Shifat%2C+F+T">Fariha Tanjim Shifat</a>, <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M">Md Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Alam%2C+M+F">Md Farhad Alam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14991v1-abstract-short" style="display: inline;"> Visual Question Answer (VQA) poses the problem of answering a natural language question about a visual context. Bangla, despite being a widely spoken language, is considered low-resource in the realm of VQA due to the lack of a proper benchmark dataset. The absence of such datasets challenges models that are known to be performant in other languages. Furthermore, existing Bangla VQA datasets offer&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14991v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14991v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14991v1-abstract-full" style="display: none;"> Visual Question Answer (VQA) poses the problem of answering a natural language question about a visual context. Bangla, despite being a widely spoken language, is considered low-resource in the realm of VQA due to the lack of a proper benchmark dataset. The absence of such datasets challenges models that are known to be performant in other languages. Furthermore, existing Bangla VQA datasets offer little cultural relevance and are largely adapted from their foreign counterparts. To address these challenges, we introduce a large-scale Bangla VQA dataset titled ChitroJera, totaling over 15k samples where diverse and locally relevant data sources are used. We assess the performance of text encoders, image encoders, multimodal models, and our novel dual-encoder models. The experiments reveal that the pre-trained dual-encoders outperform other models of its scale. We also evaluate the performance of large language models (LLMs) using prompt-based techniques, with LLMs achieving the best performance. Given the underdeveloped state of existing datasets, we envision ChitroJera expanding the scope of Vision-Language tasks in Bangla. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14991v1-abstract-full').style.display = 'none'; document.getElementById('2410.14991v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13281">arXiv:2410.13281</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.13281">pdf</a>, <a href="https://arxiv.org/format/2410.13281">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> BanTH: A Multi-label Hate Speech Detection Dataset for Transliterated Bangla </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Haider%2C+F">Fabiha Haider</a>, <a href="/search/cs?searchtype=author&amp;query=Shifat%2C+F+T">Fariha Tanjim Shifat</a>, <a href="/search/cs?searchtype=author&amp;query=Ishmam%2C+M+F">Md Farhan Ishmam</a>, <a href="/search/cs?searchtype=author&amp;query=Barua%2C+D+D">Deeparghya Dutta Barua</a>, <a href="/search/cs?searchtype=author&amp;query=Sourove%2C+M+S+U+R">Md Sakib Ul Rahman Sourove</a>, <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M">Md Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Alam%2C+M+F">Md Farhad Alam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13281v3-abstract-short" style="display: inline;"> The proliferation of transliterated texts in digital spaces has emphasized the need for detecting and classifying hate speech in languages beyond English, particularly in low-resource languages. As online discourse can perpetuate discrimination based on target groups, e.g. gender, religion, and origin, multi-label classification of hateful content can help in comprehending hate motivation and enha&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13281v3-abstract-full').style.display = 'inline'; document.getElementById('2410.13281v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13281v3-abstract-full" style="display: none;"> The proliferation of transliterated texts in digital spaces has emphasized the need for detecting and classifying hate speech in languages beyond English, particularly in low-resource languages. As online discourse can perpetuate discrimination based on target groups, e.g. gender, religion, and origin, multi-label classification of hateful content can help in comprehending hate motivation and enhance content moderation. While previous efforts have focused on monolingual or binary hate classification tasks, no work has yet addressed the challenge of multi-label hate speech classification in transliterated Bangla. We introduce BanTH, the first multi-label transliterated Bangla hate speech dataset comprising 37.3k samples. The samples are sourced from YouTube comments, where each instance is labeled with one or more target groups, reflecting the regional demographic. We establish novel transformer encoder-based baselines by further pre-training on transliterated Bangla corpus. We also propose a novel translation-based LLM prompting strategy for transliterated text. Experiments reveal that our further pre-trained encoders are achieving state-of-the-art performance on the BanTH dataset, while our translation-based prompting outperforms other strategies in the zero-shot setting. The introduction of BanTH not only fills a critical gap in hate speech research for Bangla but also sets the stage for future exploration into code-mixed and multi-label classification challenges in underrepresented languages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13281v3-abstract-full').style.display = 'none'; document.getElementById('2410.13281v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.13269">arXiv:2310.13269</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.13269">pdf</a>, <a href="https://arxiv.org/format/2310.13269">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> An Exploratory Study on Simulated Annealing for Feature Selection in Learning-to-Rank </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Haque%2C+M+S">Mohd. Sayemul Haque</a>, <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M">Md. Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Ibrahim%2C+M">Muhammad Ibrahim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.13269v1-abstract-short" style="display: inline;"> Learning-to-rank is an applied domain of supervised machine learning. As feature selection has been found to be effective for improving the accuracy of learning models in general, it is intriguing to investigate this process for learning-to-rank domain. In this study, we investigate the use of a popular meta-heuristic approach called simulated annealing for this task. Under the general framework o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13269v1-abstract-full').style.display = 'inline'; document.getElementById('2310.13269v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.13269v1-abstract-full" style="display: none;"> Learning-to-rank is an applied domain of supervised machine learning. As feature selection has been found to be effective for improving the accuracy of learning models in general, it is intriguing to investigate this process for learning-to-rank domain. In this study, we investigate the use of a popular meta-heuristic approach called simulated annealing for this task. Under the general framework of simulated annealing, we explore various neighborhood selection strategies and temperature cooling schemes. We further introduce a new hyper-parameter called the progress parameter that can effectively be used to traverse the search space. Our algorithms are evaluated on five publicly benchmark datasets of learning-to-rank. For a better validation, we also compare the simulated annealing-based feature selection algorithm with another effective meta-heuristic algorithm, namely local beam search. Extensive experimental results shows the efficacy of our proposed models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13269v1-abstract-full').style.display = 'none'; document.getElementById('2310.13269v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.05068">arXiv:2308.05068</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.05068">pdf</a>, <a href="https://arxiv.org/format/2308.05068">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Geometric Learning-Based Transformer Network for Estimation of Segmentation Errors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=C%2C+S+S">Sneha Sree C</a>, <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M+A">Mohammad Al Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Ram%2C+K">Keerthi Ram</a>, <a href="/search/cs?searchtype=author&amp;query=Sivaprakasam%2C+M">Mohanasankar Sivaprakasam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.05068v2-abstract-short" style="display: inline;"> Many segmentation networks have been proposed for 3D volumetric segmentation of tumors and organs at risk. Hospitals and clinical institutions seek to accelerate and minimize the efforts of specialists in image segmentation. Still, in case of errors generated by these networks, clinicians would have to manually edit the generated segmentation maps. Given a 3D volume and its putative segmentation m&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.05068v2-abstract-full').style.display = 'inline'; document.getElementById('2308.05068v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.05068v2-abstract-full" style="display: none;"> Many segmentation networks have been proposed for 3D volumetric segmentation of tumors and organs at risk. Hospitals and clinical institutions seek to accelerate and minimize the efforts of specialists in image segmentation. Still, in case of errors generated by these networks, clinicians would have to manually edit the generated segmentation maps. Given a 3D volume and its putative segmentation map, we propose an approach to identify and measure erroneous regions in the segmentation map. Our method can estimate error at any point or node in a 3D mesh generated from a possibly erroneous volumetric segmentation map, serving as a Quality Assurance tool. We propose a graph neural network-based transformer based on the Nodeformer architecture to measure and classify the segmentation errors at any point. We have evaluated our network on a high-resolution micro-CT dataset of the human inner-ear bony labyrinth structure by simulating erroneous 3D segmentation maps. Our network incorporates a convolutional encoder to compute node-centric features from the input micro-CT data, the Nodeformer to learn the latent graph embeddings, and a Multi-Layer Perceptron (MLP) to compute and classify the node-wise errors. Our network achieves a mean absolute error of ~0.042 over other Graph Neural Networks (GNN) and an accuracy of 79.53% over other GNNs in estimating and classifying the node-wise errors, respectively. We also put forth vertex-normal prediction as a custom pretext task for pre-training the CNN encoder to improve the network&#39;s overall performance. Qualitative analysis shows the efficiency of our network in correctly classifying errors and reducing misclassifications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.05068v2-abstract-full').style.display = 'none'; document.getElementById('2308.05068v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in MICCAI workshop on ShapeMI, 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.04821">arXiv:2308.04821</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.04821">pdf</a>, <a href="https://arxiv.org/format/2308.04821">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HyperCoil-Recon: A Hypernetwork-based Adaptive Coil Configuration Task Switching Network for MRI Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ramanarayanan%2C+S">Sriprabha Ramanarayanan</a>, <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M+A">Mohammad Al Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=S.%2C+R+G">Rahul G. S.</a>, <a href="/search/cs?searchtype=author&amp;query=Jethi%2C+A+K">Amrit Kumar Jethi</a>, <a href="/search/cs?searchtype=author&amp;query=Ram%2C+K">Keerthi Ram</a>, <a href="/search/cs?searchtype=author&amp;query=Sivaprakasam%2C+M">Mohanasankar Sivaprakasam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.04821v1-abstract-short" style="display: inline;"> Parallel imaging, a fast MRI technique, involves dynamic adjustments based on the configuration i.e. number, positioning, and sensitivity of the coils with respect to the anatomy under study. Conventional deep learning-based image reconstruction models have to be trained or fine-tuned for each configuration, posing a barrier to clinical translation, given the lack of computational resources and ma&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.04821v1-abstract-full').style.display = 'inline'; document.getElementById('2308.04821v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.04821v1-abstract-full" style="display: none;"> Parallel imaging, a fast MRI technique, involves dynamic adjustments based on the configuration i.e. number, positioning, and sensitivity of the coils with respect to the anatomy under study. Conventional deep learning-based image reconstruction models have to be trained or fine-tuned for each configuration, posing a barrier to clinical translation, given the lack of computational resources and machine learning expertise for clinicians to train models at deployment. Joint training on diverse datasets learns a single weight set that might underfit to deviated configurations. We propose, HyperCoil-Recon, a hypernetwork-based coil configuration task-switching network for multi-coil MRI reconstruction that encodes varying configurations of the numbers of coils in a multi-tasking perspective, posing each configuration as a task. The hypernetworks infer and embed task-specific weights into the reconstruction network, 1) effectively utilizing the contextual knowledge of common and varying image features among the various fields-of-view of the coils, and 2) enabling generality to unseen configurations at test time. Experiments reveal that our approach 1) adapts on the fly to various unseen configurations up to 32 coils when trained on lower numbers (i.e. 7 to 11) of randomly varying coils, and to 120 deviated unseen configurations when trained on 18 configurations in a single model, 2) matches the performance of coil configuration-specific models, and 3) outperforms configuration-invariant models with improvement margins of around 1 dB / 0.03 and 0.3 dB / 0.02 in PSNR / SSIM for knee and brain data. Our code is available at https://github.com/sriprabhar/HyperCoil-Recon <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.04821v1-abstract-full').style.display = 'none'; document.getElementById('2308.04821v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the ICCV 2023 Workshop on Computer Vision for Automated Medical Diagnosis (CVAMD), 8 pages, 2 columns</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.04262">arXiv:2308.04262</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.04262">pdf</a>, <a href="https://arxiv.org/format/2308.04262">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SDLFormer: A Sparse and Dense Locality-enhanced Transformer for Accelerated MR Image Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=S.%2C+R+G">Rahul G. S.</a>, <a href="/search/cs?searchtype=author&amp;query=Ramnarayanan%2C+S">Sriprabha Ramnarayanan</a>, <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M+A">Mohammad Al Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Ram%2C+K">Keerthi Ram</a>, <a href="/search/cs?searchtype=author&amp;query=P%2C+P+S">Preejith S. P</a>, <a href="/search/cs?searchtype=author&amp;query=Sivaprakasam%2C+M">Mohanasankar Sivaprakasam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.04262v1-abstract-short" style="display: inline;"> Transformers have emerged as viable alternatives to convolutional neural networks owing to their ability to learn non-local region relationships in the spatial domain. The self-attention mechanism of the transformer enables transformers to capture long-range dependencies in the images, which might be desirable for accelerated MRI image reconstruction as the effect of undersampling is non-local in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.04262v1-abstract-full').style.display = 'inline'; document.getElementById('2308.04262v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.04262v1-abstract-full" style="display: none;"> Transformers have emerged as viable alternatives to convolutional neural networks owing to their ability to learn non-local region relationships in the spatial domain. The self-attention mechanism of the transformer enables transformers to capture long-range dependencies in the images, which might be desirable for accelerated MRI image reconstruction as the effect of undersampling is non-local in the image domain. Despite its computational efficiency, the window-based transformers suffer from restricted receptive fields as the dependencies are limited to within the scope of the image windows. We propose a window-based transformer network that integrates dilated attention mechanism and convolution for accelerated MRI image reconstruction. The proposed network consists of dilated and dense neighborhood attention transformers to enhance the distant neighborhood pixel relationship and introduce depth-wise convolutions within the transformer module to learn low-level translation invariant features for accelerated MRI image reconstruction. The proposed model is trained in a self-supervised manner. We perform extensive experiments for multi-coil MRI acceleration for coronal PD, coronal PDFS and axial T2 contrasts with 4x and 5x under-sampling in self-supervised learning based on k-space splitting. We compare our method against other reconstruction architectures and the parallel domain self-supervised learning baseline. Results show that the proposed model exhibits improvement margins of (i) around 1.40 dB in PSNR and around 0.028 in SSIM on average over other architectures (ii) around 1.44 dB in PSNR and around 0.029 in SSIM over parallel domain self-supervised learning. The code is available at https://github.com/rahul-gs-16/sdlformer.git <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.04262v1-abstract-full').style.display = 'none'; document.getElementById('2308.04262v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at MICCAI workshop MILLanD 2023 Medical Image Learning with noisy and Limited Data</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.05057">arXiv:2304.05057</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.05057">pdf</a>, <a href="https://arxiv.org/format/2304.05057">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SFT-KD-Recon: Learning a Student-friendly Teacher for Knowledge Distillation in Magnetic Resonance Image Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gayathri%2C+M+N">Matcha Naga Gayathri</a>, <a href="/search/cs?searchtype=author&amp;query=Ramanarayanan%2C+S">Sriprabha Ramanarayanan</a>, <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M+A">Mohammad Al Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=S%2C+R+G">Rahul G S</a>, <a href="/search/cs?searchtype=author&amp;query=Ram%2C+K">Keerthi Ram</a>, <a href="/search/cs?searchtype=author&amp;query=Sivaprakasam%2C+M">Mohanasankar Sivaprakasam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.05057v1-abstract-short" style="display: inline;"> Deep cascaded architectures for magnetic resonance imaging (MRI) acceleration have shown remarkable success in providing high-quality reconstruction. However, as the number of cascades increases, the improvements in reconstruction tend to become marginal, indicating possible excess model capacity. Knowledge distillation (KD) is an emerging technique to compress these models, in which a trained dee&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.05057v1-abstract-full').style.display = 'inline'; document.getElementById('2304.05057v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.05057v1-abstract-full" style="display: none;"> Deep cascaded architectures for magnetic resonance imaging (MRI) acceleration have shown remarkable success in providing high-quality reconstruction. However, as the number of cascades increases, the improvements in reconstruction tend to become marginal, indicating possible excess model capacity. Knowledge distillation (KD) is an emerging technique to compress these models, in which a trained deep teacher network is used to distill knowledge to a smaller student network such that the student learns to mimic the behavior of the teacher. Most KD methods focus on effectively training the student with a pre-trained teacher unaware of the student model. We propose SFT-KD-Recon, a student-friendly teacher training approach along with the student as a prior step to KD to make the teacher aware of the structure and capacity of the student and enable aligning the representations of the teacher with the student. In SFT, the teacher is jointly trained with the unfolded branch configurations of the student blocks using three loss terms - teacher-reconstruction loss, student-reconstruction loss, and teacher-student imitation loss, followed by KD of the student. We perform extensive experiments for MRI acceleration in 4x and 5x under-sampling on the brain and cardiac datasets on five KD methods using the proposed approach as a prior step. We consider the DC-CNN architecture and setup teacher as D5C5 (141765 parameters), and student as D3C5 (49285 parameters), denoting a compression of 2.87:1. Results show that (i) our approach consistently improves the KD methods with improved reconstruction performance and image quality, and (ii) the student distilled using our approach is competitive with the teacher, with the performance gap reduced from 0.53 dB to 0.03 dB. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.05057v1-abstract-full').style.display = 'none'; document.getElementById('2304.05057v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 8 figures. Accepted for publication at MIDL 2023. Code for our proposed method is available at https://github.com/GayathriMatcha/SFT-KD-Recon</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.09825">arXiv:2302.09825</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.09825">pdf</a>, <a href="https://arxiv.org/format/2302.09825">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TBPos: Dataset for Large-Scale Precision Visual Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M">Masud Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=S%C3%B6chting%2C+I">Ilona S枚chting</a>, <a href="/search/cs?searchtype=author&amp;query=Ferranti%2C+L">Luca Ferranti</a>, <a href="/search/cs?searchtype=author&amp;query=Kannala%2C+J">Juho Kannala</a>, <a href="/search/cs?searchtype=author&amp;query=Boutellier%2C+J">Jani Boutellier</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.09825v1-abstract-short" style="display: inline;"> Image based localization is a classical computer vision challenge, with several well-known datasets. Generally, datasets consist of a visual 3D database that captures the modeled scenery, as well as query images whose 3D pose is to be discovered. Usually the query images have been acquired with a camera that differs from the imaging hardware used to collect the 3D database; consequently, it is har&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09825v1-abstract-full').style.display = 'inline'; document.getElementById('2302.09825v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.09825v1-abstract-full" style="display: none;"> Image based localization is a classical computer vision challenge, with several well-known datasets. Generally, datasets consist of a visual 3D database that captures the modeled scenery, as well as query images whose 3D pose is to be discovered. Usually the query images have been acquired with a camera that differs from the imaging hardware used to collect the 3D database; consequently, it is hard to acquire accurate ground truth poses between query images and the 3D database. As the accuracy of visual localization algorithms constantly improves, precise ground truth becomes increasingly important. This paper proposes TBPos, a novel large-scale visual dataset for image based positioning, which provides query images with fully accurate ground truth poses: both the database images and the query images have been derived from the same laser scanner data. In the experimental part of the paper, the proposed dataset is evaluated by means of an image-based localization pipeline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09825v1-abstract-full').style.display = 'none'; document.getElementById('2302.09825v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Scandinavian Conference on Image Analysis 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.09751">arXiv:2211.09751</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.09751">pdf</a>, <a href="https://arxiv.org/format/2211.09751">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Heart Abnormality Detection from Heart Sound Signals using MFCC Feature and Dual Stream Attention Based Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Rashid%2C+N">Nayeeb Rashid</a>, <a href="/search/cs?searchtype=author&amp;query=Saha%2C+S">Swapnil Saha</a>, <a href="/search/cs?searchtype=author&amp;query=Subah%2C+M+R">Mohseu Rashid Subah</a>, <a href="/search/cs?searchtype=author&amp;query=Robin%2C+R+A">Rizwan Ahmed Robin</a>, <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+S+M+H">Syed Mortuza Hasan Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Ahmed%2C+S">Shahed Ahmed</a>, <a href="/search/cs?searchtype=author&amp;query=Mahmud%2C+T+I">Talha Ibn Mahmud</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.09751v1-abstract-short" style="display: inline;"> Cardiovascular diseases are one of the leading cause of death in today&#39;s world and early screening of heart condition plays a crucial role in preventing them. The heart sound signal is one of the primary indicator of heart condition and can be used to detect abnormality in the heart. The acquisition of heart sound signal is non-invasive, cost effective and requires minimum equipment. But currently&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.09751v1-abstract-full').style.display = 'inline'; document.getElementById('2211.09751v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.09751v1-abstract-full" style="display: none;"> Cardiovascular diseases are one of the leading cause of death in today&#39;s world and early screening of heart condition plays a crucial role in preventing them. The heart sound signal is one of the primary indicator of heart condition and can be used to detect abnormality in the heart. The acquisition of heart sound signal is non-invasive, cost effective and requires minimum equipment. But currently the detection of heart abnormality from heart sound signal depends largely on the expertise and experience of the physician. As such an automatic detection system for heart abnormality detection from heart sound signal can be a great asset for the people living in underdeveloped areas. In this paper we propose a novel deep learning based dual stream network with attention mechanism that uses both the raw heart sound signal and the MFCC features to detect abnormality in heart condition of a patient. The deep neural network has a convolutional stream that uses the raw heart sound signal and a recurrent stream that uses the MFCC features of the signal. The features from these two streams are merged together using a novel attention network and passed through the classification network. The model is trained on the largest publicly available dataset of PCG signal and achieves an accuracy of 87.11, sensitivity of 82.41, specificty of 91.8 and a MACC of 87.12. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.09751v1-abstract-full').style.display = 'none'; document.getElementById('2211.09751v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.00310">arXiv:2211.00310</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.00310">pdf</a>, <a href="https://arxiv.org/format/2211.00310">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SADT: Combining Sharpness-Aware Minimization with Self-Distillation for Improved Model Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M+A+I">Masud An-Nur Islam Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Boutellier%2C+J">Jani Boutellier</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.00310v1-abstract-short" style="display: inline;"> Methods for improving deep neural network training times and model generalizability consist of various data augmentation, regularization, and optimization approaches, which tend to be sensitive to hyperparameter settings and make reproducibility more challenging. This work jointly considers two recent training strategies that address model generalizability: sharpness-aware minimization, and self-d&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00310v1-abstract-full').style.display = 'inline'; document.getElementById('2211.00310v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.00310v1-abstract-full" style="display: none;"> Methods for improving deep neural network training times and model generalizability consist of various data augmentation, regularization, and optimization approaches, which tend to be sensitive to hyperparameter settings and make reproducibility more challenging. This work jointly considers two recent training strategies that address model generalizability: sharpness-aware minimization, and self-distillation, and proposes the novel training strategy of Sharpness-Aware Distilled Teachers (SADT). The experimental section of this work shows that SADT consistently outperforms previously published training strategies in model convergence time, test-time performance, and model generalizability over various neural architectures, datasets, and hyperparameter settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00310v1-abstract-full').style.display = 'none'; document.getElementById('2211.00310v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to the &#34;Has it Trained Yet?&#34; Workshop at the Conference on Neural Information Processing Systems (NeurIPS 2022)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.01948">arXiv:2208.01948</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2208.01948">pdf</a>, <a href="https://arxiv.org/format/2208.01948">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Decay2Distill: Leveraging spatial perturbation and regularization for self-supervised image denoising </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chaity%2C+M+D">Manisha Das Chaity</a>, <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M+A+N+I">Masud An Nur Islam Fahim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.01948v2-abstract-short" style="display: inline;"> Unpaired image denoising has achieved promising development over the last few years. Regardless of the performance, methods tend to heavily rely on underlying noise properties or any assumption which is not always practical. Alternatively, if we can ground the problem from a structural perspective rather than noise statistics, we can achieve a more robust solution. with such motivation, we propose&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.01948v2-abstract-full').style.display = 'inline'; document.getElementById('2208.01948v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.01948v2-abstract-full" style="display: none;"> Unpaired image denoising has achieved promising development over the last few years. Regardless of the performance, methods tend to heavily rely on underlying noise properties or any assumption which is not always practical. Alternatively, if we can ground the problem from a structural perspective rather than noise statistics, we can achieve a more robust solution. with such motivation, we propose a self-supervised denoising scheme that is unpaired and relies on spatial degradation followed by a regularized refinement. Our method shows considerable improvement over previous methods and exhibited consistent performance over different data domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.01948v2-abstract-full').style.display = 'none'; document.getElementById('2208.01948v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.05374">arXiv:2207.05374</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2207.05374">pdf</a>, <a href="https://arxiv.org/format/2207.05374">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Rethinking gradient weights&#39; influence over saliency map estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M+A+N+I">Masud An Nur Islam Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Saqib%2C+N">Nazmus Saqib</a>, <a href="/search/cs?searchtype=author&amp;query=Siam%2C+S+K">Shafkat Khan Siam</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+H+Y">Ho Yub Jung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.05374v1-abstract-short" style="display: inline;"> Class activation map (CAM) helps to formulate saliency maps that aid in interpreting the deep neural network&#39;s prediction. Gradient-based methods are generally faster than other branches of vision interpretability and independent of human guidance. The performance of CAM-like studies depends on the governing model&#39;s layer response, and the influences of the gradients. Typical gradient-oriented CAM&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.05374v1-abstract-full').style.display = 'inline'; document.getElementById('2207.05374v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.05374v1-abstract-full" style="display: none;"> Class activation map (CAM) helps to formulate saliency maps that aid in interpreting the deep neural network&#39;s prediction. Gradient-based methods are generally faster than other branches of vision interpretability and independent of human guidance. The performance of CAM-like studies depends on the governing model&#39;s layer response, and the influences of the gradients. Typical gradient-oriented CAM studies rely on weighted aggregation for saliency map estimation by projecting the gradient maps into single weight values, which may lead to over generalized saliency map. To address this issue, we use a global guidance map to rectify the weighted aggregation operation during saliency estimation, where resultant interpretations are comparatively clean er and instance-specific. We obtain the global guidance map by performing elementwise multiplication between the feature maps and their corresponding gradient maps. To validate our study, we compare the proposed study with eight different saliency visualizers. In addition, we use seven commonly used evaluation metrics for quantitative comparison. The proposed scheme achieves significant improvement over the test images from the ImageNet, MS-COCO 14, and PASCAL VOC 2012 datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.05374v1-abstract-full').style.display = 'none'; document.getElementById('2207.05374v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.05176">arXiv:2207.05176</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2207.05176">pdf</a>, <a href="https://arxiv.org/format/2207.05176">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Denoising single images by feature ensemble revisited </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M+A+N+I">Masud An Nur Islam Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Saqib%2C+N">Nazmus Saqib</a>, <a href="/search/cs?searchtype=author&amp;query=Siam%2C+S+K">Shafkat Khan Siam</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+H+Y">Ho Yub Jung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.05176v1-abstract-short" style="display: inline;"> Image denoising is still a challenging issue in many computer vision sub-domains. Recent studies show that significant improvements are made possible in a supervised setting. However, few challenges, such as spatial fidelity and cartoon-like smoothing remain unresolved or decisively overlooked. Our study proposes a simple yet efficient architecture for the denoising problem that addresses the afor&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.05176v1-abstract-full').style.display = 'inline'; document.getElementById('2207.05176v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.05176v1-abstract-full" style="display: none;"> Image denoising is still a challenging issue in many computer vision sub-domains. Recent studies show that significant improvements are made possible in a supervised setting. However, few challenges, such as spatial fidelity and cartoon-like smoothing remain unresolved or decisively overlooked. Our study proposes a simple yet efficient architecture for the denoising problem that addresses the aforementioned issues. The proposed architecture revisits the concept of modular concatenation instead of long and deeper cascaded connections, to recover a cleaner approximation of the given image. We find that different modules can capture versatile representations, and concatenated representation creates a richer subspace for low-level image restoration. The proposed architecture&#39;s number of parameters remains smaller than the number for most of the previous networks and still achieves significant improvements over the current state-of-the-art networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.05176v1-abstract-full').style.display = 'none'; document.getElementById('2207.05176v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.07546">arXiv:2204.07546</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2204.07546">pdf</a>, <a href="https://arxiv.org/format/2204.07546">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Semi-supervised atmospheric component learning in low-light image problem </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M+A+N+I">Masud An Nur Islam Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Saqib%2C+N">Nazmus Saqib</a>, <a href="/search/cs?searchtype=author&amp;query=Yub%2C+J+H">Jung Ho Yub</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.07546v1-abstract-short" style="display: inline;"> Ambient lighting conditions play a crucial role in determining the perceptual quality of images from photographic devices. In general, inadequate transmission light and undesired atmospheric conditions jointly degrade the image quality. If we know the desired ambient factors associated with the given low-light image, we can recover the enhanced image easily \cite{b1}. Typical deep networks perform&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07546v1-abstract-full').style.display = 'inline'; document.getElementById('2204.07546v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.07546v1-abstract-full" style="display: none;"> Ambient lighting conditions play a crucial role in determining the perceptual quality of images from photographic devices. In general, inadequate transmission light and undesired atmospheric conditions jointly degrade the image quality. If we know the desired ambient factors associated with the given low-light image, we can recover the enhanced image easily \cite{b1}. Typical deep networks perform enhancement mappings without investigating the light distribution and color formulation properties. This leads to a lack of image instance-adaptive performance in practice. On the other hand, physical model-driven schemes suffer from the need for inherent decompositions and multiple objective minimizations. Moreover, the above approaches are rarely data efficient or free of postprediction tuning. Influenced by the above issues, this study presents a semisupervised training method using no-reference image quality metrics for low-light image restoration. We incorporate the classical haze distribution model \cite{b2} to explore the physical properties of the given image in order to learn the effect of atmospheric components and minimize a single objective for restoration. We validate the performance of our network for six widely used low-light datasets. The experiments show that the proposed study achieves state-of-the-art or comparable performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07546v1-abstract-full').style.display = 'none'; document.getElementById('2204.07546v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.08760">arXiv:2203.08760</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2203.08760">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Incorporating Multi-Agent Systems Technology in Power and Energy Systems of Bangladesh: A Feasibility Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hassan%2C+S+R+M">Syed Redwan Md Hassan</a>, <a href="/search/cs?searchtype=author&amp;query=Hasan%2C+N">Nazmul Hasan</a>, <a href="/search/cs?searchtype=author&amp;query=Siddique%2C+M+A">Mohammad Ali Siddique</a>, <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+K+M+S">K. M Solaiman Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Rahman%2C+R">Rummana Rahman</a>, <a href="/search/cs?searchtype=author&amp;query=Iftekhar%2C+L">Lamia Iftekhar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.08760v1-abstract-short" style="display: inline;"> The power sector of Bangladesh is presently experiencing essential changes as demand for power services is increasing with rising population and economic development. With a gradual shift from a rigidly centralized structure to a more decentralized and fluid setup, fundamentally because of the enormous advancement of distributed renewable energy sources, the future power system of the nation requi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.08760v1-abstract-full').style.display = 'inline'; document.getElementById('2203.08760v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.08760v1-abstract-full" style="display: none;"> The power sector of Bangladesh is presently experiencing essential changes as demand for power services is increasing with rising population and economic development. With a gradual shift from a rigidly centralized structure to a more decentralized and fluid setup, fundamentally because of the enormous advancement of distributed renewable energy sources, the future power system of the nation requires new control strategies to work efficiently and sustainably in the face of evolving conditions and constraints. Multi-Agent Systems (MAS) technology has attributes that meet these prerequisites of modern power systems and has been shown to be effective in dealing with its distributed and complex nature. This is a literature-based feasibility study to explore whether MAS technology is suited to be applied in the context of Bangladesh. For this preliminary paper, we look at the topic from a holistic perspective and conduct a meta-review to curate common applications of Multi-Agent System-based concepts, tools and algorithms on the power and energy sector. We also identify the top challenges of this domain in Bangladesh and connect the potential MAS-based solutions to address each challenge. Our qualitative assessment is motivated to provide a starting point for local researchers eager to experiment with MAS technology for application in Bangladesh. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.08760v1-abstract-full').style.display = 'none'; document.getElementById('2203.08760v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.11168">arXiv:2006.11168</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2006.11168">pdf</a>, <a href="https://arxiv.org/format/2006.11168">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Emotion Recognition on large video dataset based on Convolutional Feature Extractor and Recurrent Neural Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Rangulov%2C+D">Denis Rangulov</a>, <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M">Muhammad Fahim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.11168v1-abstract-short" style="display: inline;"> For many years, the emotion recognition task has remained one of the most interesting and important problems in the field of human-computer interaction. In this study, we consider the emotion recognition task as a classification as well as a regression task by processing encoded emotions in different datasets using deep learning models. Our model combines convolutional neural network (CNN) with re&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.11168v1-abstract-full').style.display = 'inline'; document.getElementById('2006.11168v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.11168v1-abstract-full" style="display: none;"> For many years, the emotion recognition task has remained one of the most interesting and important problems in the field of human-computer interaction. In this study, we consider the emotion recognition task as a classification as well as a regression task by processing encoded emotions in different datasets using deep learning models. Our model combines convolutional neural network (CNN) with recurrent neural network (RNN) to predict dimensional emotions on video data. At the first step, CNN extracts feature vectors from video frames. In the second step, we fed these feature vectors to train RNN for exploiting the temporal dynamics of video. Furthermore, we analyzed how each neural network contributes to the system&#39;s overall performance. The experiments are performed on publicly available datasets including the largest modern Aff-Wild2 database. It contains over sixty hours of video data. We discovered the problem of overfitting of the model on an unbalanced dataset with an illustrative example using confusion matrices. The problem is solved by downsampling technique to balance the dataset. By significantly decreasing training data, we balance the dataset, thereby, the overall performance of the model is improved. Hence, the study qualitatively describes the abilities of deep learning models exploring enough amount of data to predict facial emotions. Our proposed method is implemented using Tensorflow Keras. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.11168v1-abstract-full').style.display = 'none'; document.getElementById('2006.11168v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 7 figures, Face and Gesture 2020 Workshop Paper (ABAW2020 competition)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1903.08326">arXiv:1903.08326</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1903.08326">pdf</a>, <a href="https://arxiv.org/format/1903.08326">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Numerically Stable Polynomially Coded Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M">Mohammad Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Cadambe%2C+V+R">Viveck R. Cadambe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1903.08326v2-abstract-short" style="display: inline;"> We study the numerical stability of polynomial based encoding methods, which has emerged to be a powerful class of techniques for providing straggler and fault tolerance in the area of coded computing. Our contributions are as follows: 1) We construct new codes for matrix multiplication that achieve the same fault/straggler tolerance as the previously constructed MatDot Codes and Polynomial Codes.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.08326v2-abstract-full').style.display = 'inline'; document.getElementById('1903.08326v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1903.08326v2-abstract-full" style="display: none;"> We study the numerical stability of polynomial based encoding methods, which has emerged to be a powerful class of techniques for providing straggler and fault tolerance in the area of coded computing. Our contributions are as follows: 1) We construct new codes for matrix multiplication that achieve the same fault/straggler tolerance as the previously constructed MatDot Codes and Polynomial Codes. Unlike previous codes that use polynomials expanded in a monomial basis, our codes uses a basis of orthogonal polynomials. 2) We show that the condition number of every $m \times m$ sub-matrix of an $m \times n, n \geq m$ Chebyshev-Vandermonde matrix, evaluated on the $n$-point Chebyshev grid, grows as $O(n^{2(n-m)})$ for $n &gt; m$. An implication of this result is that, when Chebyshev-Vandermonde matrices are used for coded computing, for a fixed number of redundant nodes $s=n-m,$ the condition number grows at most polynomially in the number of nodes $n$. 3) By specializing our orthogonal polynomial based constructions to Chebyshev polynomials, and using our condition number bound for Chebyshev-Vandermonde matrices, we construct new numerically stable techniques for coded matrix multiplication. We empirically demonstrate that our constructions have significantly lower numerical errors compared to previous approaches which involve inversion of Vandermonde matrices. We generalize our constructions to explore the trade-off between computation/communication and fault-tolerance. 4) We propose a numerically stable specialization of Lagrange coded computing. Motivated by our condition number bound, our approach involves the choice of evaluation points and a suitable decoding procedure that involves inversion of an appropriate Chebyshev-Vandermonde matrix. Our approach is demonstrated empirically to have lower numerical errors as compared to standard methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.08326v2-abstract-full').style.display = 'none'; document.getElementById('1903.08326v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31 pages, 13 figures, to be presented in part at the IEEE International Symposium on Information Theory (ISIT), July 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1801.10292">arXiv:1801.10292</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1801.10292">pdf</a>, <a href="https://arxiv.org/format/1801.10292">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> On the Optimal Recovery Threshold of Coded Matrix Multiplication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dutta%2C+S">Sanghamitra Dutta</a>, <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M">Mohammad Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Haddadpour%2C+F">Farzin Haddadpour</a>, <a href="/search/cs?searchtype=author&amp;query=Jeong%2C+H">Haewon Jeong</a>, <a href="/search/cs?searchtype=author&amp;query=Cadambe%2C+V">Viveck Cadambe</a>, <a href="/search/cs?searchtype=author&amp;query=Grover%2C+P">Pulkit Grover</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1801.10292v2-abstract-short" style="display: inline;"> We provide novel coded computation strategies for distributed matrix-matrix products that outperform the recent &#34;Polynomial code&#34; constructions in recovery threshold, i.e., the required number of successful workers. When $m$-th fraction of each matrix can be stored in each worker node, Polynomial codes require $m^2$ successful workers, while our MatDot codes only require $2m-1$ successful workers,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.10292v2-abstract-full').style.display = 'inline'; document.getElementById('1801.10292v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1801.10292v2-abstract-full" style="display: none;"> We provide novel coded computation strategies for distributed matrix-matrix products that outperform the recent &#34;Polynomial code&#34; constructions in recovery threshold, i.e., the required number of successful workers. When $m$-th fraction of each matrix can be stored in each worker node, Polynomial codes require $m^2$ successful workers, while our MatDot codes only require $2m-1$ successful workers, albeit at a higher communication cost from each worker to the fusion node. We also provide a systematic construction of MatDot codes. Further, we propose &#34;PolyDot&#34; coding that interpolates between Polynomial codes and MatDot codes to trade off communication cost and recovery threshold. Finally, we demonstrate a coding technique for multiplying $n$ matrices ($n \geq 3$) by applying MatDot and PolyDot coding ideas. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.10292v2-abstract-full').style.display = 'none'; document.getElementById('1801.10292v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 January, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Extended version of the paper that appeared at Allerton 2017 (October 2017), including full proofs and further results. Submitted to IEEE Transactions on Information Theory</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1705.02704">arXiv:1705.02704</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1705.02704">pdf</a>, <a href="https://arxiv.org/format/1705.02704">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Linear Network Coding for Two-Unicast-$Z$ Networks: A Commutative Algebraic Perspective and Fundamental Limits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fahim%2C+M">Mohammad Fahim</a>, <a href="/search/cs?searchtype=author&amp;query=Cadambe%2C+V">Viveck Cadambe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1705.02704v2-abstract-short" style="display: inline;"> We consider a two-unicast-$Z$ network over a directed acyclic graph of unit capacitated edges; the two-unicast-$Z$ network is a special case of two-unicast networks where one of the destinations has apriori side information of the unwanted (interfering) message. In this paper, we settle open questions on the limits of network coding for two-unicast-$Z$ networks by showing that the generalized netw&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1705.02704v2-abstract-full').style.display = 'inline'; document.getElementById('1705.02704v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1705.02704v2-abstract-full" style="display: none;"> We consider a two-unicast-$Z$ network over a directed acyclic graph of unit capacitated edges; the two-unicast-$Z$ network is a special case of two-unicast networks where one of the destinations has apriori side information of the unwanted (interfering) message. In this paper, we settle open questions on the limits of network coding for two-unicast-$Z$ networks by showing that the generalized network sharing bound is not tight, vector linear codes outperform scalar linear codes, and non-linear codes outperform linear codes in general. We also develop a commutative algebraic approach to deriving linear network coding achievability results, and demonstrate our approach by providing an alternate proof to the previous results of C. Wang et. al., I. Wang et. al. and Shenvi et. al. regarding feasibility of rate $(1,1)$ in the network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1705.02704v2-abstract-full').style.display = 'none'; document.getElementById('1705.02704v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 May, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">A short version of this paper is published in the Proceedings of The IEEE International Symposium on Information Theory (ISIT), June 2017</span> </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10