CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 81 results for author: <span class="mathjax">Islam, M M</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Islam%2C+M+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Islam, M M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Islam%2C+M+M&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Islam, M M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Islam%2C+M+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Islam%2C+M+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Islam%2C+M+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15876">arXiv:2411.15876</a> <span> [<a href="https://arxiv.org/pdf/2411.15876">pdf</a>, <a href="https://arxiv.org/format/2411.15876">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Extensive Study on D2C: Overfitting Remediation in Deep Learning Using a Decentralized Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Siddiqui%2C+M+S+B">Md. Saiful Bari Siddiqui</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mohaiminul Islam</a>, <a href="/search/cs?searchtype=author&query=Alam%2C+M+G+R">Md. Golam Rabiul Alam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15876v1-abstract-short" style="display: inline;"> Overfitting remains a significant challenge in deep learning, often arising from data outliers, noise, and limited training data. To address this, we propose Divide2Conquer (D2C), a novel technique to mitigate overfitting. D2C partitions the training data into multiple subsets and trains identical models independently on each subset. To balance model generalization and subset-specific learning, th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15876v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15876v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15876v1-abstract-full" style="display: none;"> Overfitting remains a significant challenge in deep learning, often arising from data outliers, noise, and limited training data. To address this, we propose Divide2Conquer (D2C), a novel technique to mitigate overfitting. D2C partitions the training data into multiple subsets and trains identical models independently on each subset. To balance model generalization and subset-specific learning, the model parameters are periodically aggregated and averaged during training. This process enables the learning of robust patterns while minimizing the influence of outliers and noise. Empirical evaluations on benchmark datasets across diverse deep-learning tasks demonstrate that D2C significantly enhances generalization performance, particularly with larger datasets. Our analysis includes evaluations of decision boundaries, loss curves, and other performance metrics, highlighting D2C's effectiveness both as a standalone technique and in combination with other overfitting reduction methods. We further provide a rigorous mathematical justification for D2C's underlying principles and examine its applicability across multiple domains. Finally, we explore the trade-offs associated with D2C and propose strategies to address them, offering a holistic view of its strengths and limitations. This study establishes D2C as a versatile and effective approach to combating overfitting in deep learning. Our codes are publicly available at: https://github.com/Saiful185/Divide2Conquer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15876v1-abstract-full').style.display = 'none'; document.getElementById('2411.15876v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 Pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14901">arXiv:2411.14901</a> <span> [<a href="https://arxiv.org/pdf/2411.14901">pdf</a>, <a href="https://arxiv.org/format/2411.14901">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ReVisionLLM: Recursive Vision-Language Model for Temporal Grounding in Hour-Long Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hannan%2C+T">Tanveer Hannan</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mohaiminul Islam</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+J">Jindong Gu</a>, <a href="/search/cs?searchtype=author&query=Seidl%2C+T">Thomas Seidl</a>, <a href="/search/cs?searchtype=author&query=Bertasius%2C+G">Gedas Bertasius</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14901v1-abstract-short" style="display: inline;"> Large language models (LLMs) excel at retrieving information from lengthy text, but their vision-language counterparts (VLMs) face difficulties with hour-long videos, especially for temporal grounding. Specifically, these VLMs are constrained by frame limitations, often losing essential temporal details needed for accurate event localization in extended video content. We propose ReVisionLLM, a rec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14901v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14901v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14901v1-abstract-full" style="display: none;"> Large language models (LLMs) excel at retrieving information from lengthy text, but their vision-language counterparts (VLMs) face difficulties with hour-long videos, especially for temporal grounding. Specifically, these VLMs are constrained by frame limitations, often losing essential temporal details needed for accurate event localization in extended video content. We propose ReVisionLLM, a recursive vision-language model designed to locate events in hour-long videos. Inspired by human search strategies, our model initially targets broad segments of interest, progressively revising its focus to pinpoint exact temporal boundaries. Our model can seamlessly handle videos of vastly different lengths, from minutes to hours. We also introduce a hierarchical training strategy that starts with short clips to capture distinct events and progressively extends to longer videos. To our knowledge, ReVisionLLM is the first VLM capable of temporal grounding in hour-long videos, outperforming previous state-of-the-art methods across multiple datasets by a significant margin (+2.6% R1@0.1 on MAD). The code is available at https://github.com/Tanveer81/ReVisionLLM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14901v1-abstract-full').style.display = 'none'; document.getElementById('2411.14901v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06078">arXiv:2411.06078</a> <span> [<a href="https://arxiv.org/pdf/2411.06078">pdf</a>, <a href="https://arxiv.org/format/2411.06078">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Survey on Kolmogorov-Arnold Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Somvanshi%2C+S">Shriyank Somvanshi</a>, <a href="/search/cs?searchtype=author&query=Javed%2C+S+A">Syed Aaqib Javed</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Monzurul Islam</a>, <a href="/search/cs?searchtype=author&query=Pandit%2C+D">Diwas Pandit</a>, <a href="/search/cs?searchtype=author&query=Das%2C+S">Subasish Das</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06078v1-abstract-short" style="display: inline;"> This systematic review explores the theoretical foundations, evolution, applications, and future potential of Kolmogorov-Arnold Networks (KAN), a neural network model inspired by the Kolmogorov-Arnold representation theorem. KANs distinguish themselves from traditional neural networks by using learnable, spline-parameterized functions instead of fixed activation functions, allowing for flexible an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06078v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06078v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06078v1-abstract-full" style="display: none;"> This systematic review explores the theoretical foundations, evolution, applications, and future potential of Kolmogorov-Arnold Networks (KAN), a neural network model inspired by the Kolmogorov-Arnold representation theorem. KANs distinguish themselves from traditional neural networks by using learnable, spline-parameterized functions instead of fixed activation functions, allowing for flexible and interpretable representations of high-dimensional functions. This review details KAN's architectural strengths, including adaptive edge-based activation functions that improve parameter efficiency and scalability in applications such as time series forecasting, computational biomedicine, and graph learning. Key advancements, including Temporal-KAN, FastKAN, and Partial Differential Equation (PDE) KAN, illustrate KAN's growing applicability in dynamic environments, enhancing interpretability, computational efficiency, and adaptability for complex function approximation tasks. Additionally, this paper discusses KAN's integration with other architectures, such as convolutional, recurrent, and transformer-based models, showcasing its versatility in complementing established neural networks for tasks requiring hybrid approaches. Despite its strengths, KAN faces computational challenges in high-dimensional and noisy data settings, motivating ongoing research into optimization strategies, regularization techniques, and hybrid models. This paper highlights KAN's role in modern neural architectures and outlines future directions to improve its computational efficiency, interpretability, and scalability in data-intensive applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06078v1-abstract-full').style.display = 'none'; document.getElementById('2411.06078v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18052">arXiv:2410.18052</a> <span> [<a href="https://arxiv.org/pdf/2410.18052">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> In-Pixel Foreground and Contrast Enhancement Circuits with Customizable Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Udoy%2C+M+R+I">Md Rahatul Islam Udoy</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mazharul Islam</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+E">Elijah Johnson</a>, <a href="/search/cs?searchtype=author&query=Aziz%2C+A">Ahmedullah Aziz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18052v1-abstract-short" style="display: inline;"> This paper presents an innovative in-pixel contrast enhancement circuit that performs image processing directly within the pixel circuit. The circuit can be tuned for different modes of operation. In foreground enhancement mode, it suppresses low-intensity background pixels to nearly zero, isolating the foreground for better object visibility. In contrast enhancement mode, it improves overall imag… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18052v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18052v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18052v1-abstract-full" style="display: none;"> This paper presents an innovative in-pixel contrast enhancement circuit that performs image processing directly within the pixel circuit. The circuit can be tuned for different modes of operation. In foreground enhancement mode, it suppresses low-intensity background pixels to nearly zero, isolating the foreground for better object visibility. In contrast enhancement mode, it improves overall image contrast. The contrast enhancement function is customizable both during the design phase and in real-time, allowing the circuit to adapt to specific applications and varying lighting conditions. A model of the designed pixel circuit is developed and applied to a full pixel array, demonstrating significant improvements in image quality. Simulations performed in HSPICE show a nearly 6x increase in Michelson Contrast Ratio (CR) in the foreground enhancement mode. The simulation results indicate its potential for real-time, adaptive contrast enhancement across various imaging environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18052v1-abstract-full').style.display = 'none'; document.getElementById('2410.18052v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15017">arXiv:2410.15017</a> <span> [<a href="https://arxiv.org/pdf/2410.15017">pdf</a>, <a href="https://arxiv.org/format/2410.15017">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> DM-Codec: Distilling Multimodal Representations for Speech Tokenization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahasan%2C+M+M">Md Mubtasim Ahasan</a>, <a href="/search/cs?searchtype=author&query=Fahim%2C+M">Md Fahim</a>, <a href="/search/cs?searchtype=author&query=Mohiuddin%2C+T">Tasnim Mohiuddin</a>, <a href="/search/cs?searchtype=author&query=Rahman%2C+A+K+M+M">A K M Mahbubur Rahman</a>, <a href="/search/cs?searchtype=author&query=Chadha%2C+A">Aman Chadha</a>, <a href="/search/cs?searchtype=author&query=Iqbal%2C+T">Tariq Iqbal</a>, <a href="/search/cs?searchtype=author&query=Amin%2C+M+A">M Ashraful Amin</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mofijul Islam</a>, <a href="/search/cs?searchtype=author&query=Ali%2C+A+A">Amin Ahsan Ali</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15017v1-abstract-short" style="display: inline;"> Recent advancements in speech-language models have yielded significant improvements in speech tokenization and synthesis. However, effectively mapping the complex, multidimensional attributes of speech into discrete tokens remains challenging. This process demands acoustic, semantic, and contextual information for precise speech representations. Existing speech representations generally fall into… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15017v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15017v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15017v1-abstract-full" style="display: none;"> Recent advancements in speech-language models have yielded significant improvements in speech tokenization and synthesis. However, effectively mapping the complex, multidimensional attributes of speech into discrete tokens remains challenging. This process demands acoustic, semantic, and contextual information for precise speech representations. Existing speech representations generally fall into two categories: acoustic tokens from audio codecs and semantic tokens from speech self-supervised learning models. Although recent efforts have unified acoustic and semantic tokens for improved performance, they overlook the crucial role of contextual representation in comprehensive speech modeling. Our empirical investigations reveal that the absence of contextual representations results in elevated Word Error Rate (WER) and Word Information Lost (WIL) scores in speech transcriptions. To address these limitations, we propose two novel distillation approaches: (1) a language model (LM)-guided distillation method that incorporates contextual information, and (2) a combined LM and self-supervised speech model (SM)-guided distillation technique that effectively distills multimodal representations (acoustic, semantic, and contextual) into a comprehensive speech tokenizer, termed DM-Codec. The DM-Codec architecture adopts a streamlined encoder-decoder framework with a Residual Vector Quantizer (RVQ) and incorporates the LM and SM during the training process. Experiments show DM-Codec significantly outperforms state-of-the-art speech tokenization models, reducing WER by up to 13.46%, WIL by 9.82%, and improving speech quality by 5.84% and intelligibility by 1.85% on the LibriSpeech benchmark dataset. The code, samples, and model checkpoints are available at https://github.com/mubtasimahasan/DM-Codec. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15017v1-abstract-full').style.display = 'none'; document.getElementById('2410.15017v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14536">arXiv:2410.14536</a> <span> [<a href="https://arxiv.org/pdf/2410.14536">pdf</a>, <a href="https://arxiv.org/format/2410.14536">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Hybrid Feature Fusion Deep Learning Framework for Leukemia Cancer Detection in Microscopic Blood Sample Using Gated Recurrent Unit and Uncertainty Quantification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Akter%2C+M">Maksuda Akter</a>, <a href="/search/cs?searchtype=author&query=Khatun%2C+R">Rabea Khatun</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Manowarul Islam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14536v1-abstract-short" style="display: inline;"> Acute lymphoblastic leukemia (ALL) is the most malignant form of leukemia and the most common cancer in adults and children. Traditionally, leukemia is diagnosed by analyzing blood and bone marrow smears under a microscope, with additional cytochemical tests for confirmation. However, these methods are expensive, time consuming, and highly dependent on expert knowledge. In recent years, deep learn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14536v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14536v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14536v1-abstract-full" style="display: none;"> Acute lymphoblastic leukemia (ALL) is the most malignant form of leukemia and the most common cancer in adults and children. Traditionally, leukemia is diagnosed by analyzing blood and bone marrow smears under a microscope, with additional cytochemical tests for confirmation. However, these methods are expensive, time consuming, and highly dependent on expert knowledge. In recent years, deep learning, particularly Convolutional Neural Networks (CNNs), has provided advanced methods for classifying microscopic smear images, aiding in the detection of leukemic cells. These approaches are quick, cost effective, and not subject to human bias. However, most methods lack the ability to quantify uncertainty, which could lead to critical misdiagnoses. In this research, hybrid deep learning models (InceptionV3-GRU, EfficientNetB3-GRU, MobileNetV2-GRU) were implemented to classify ALL. Bayesian optimization was used to fine tune the model's hyperparameters and improve its performance. Additionally, Deep Ensemble uncertainty quantification was applied to address uncertainty during leukemia image classification. The proposed models were trained on the publicly available datasets ALL-IDB1 and ALL-IDB2. Their results were then aggregated at the score level using the sum rule. The parallel architecture used in these models offers a high level of confidence in differentiating between ALL and non-ALL cases. The proposed method achieved a remarkable detection accuracy rate of 100% on the ALL-IDB1 dataset, 98.07% on the ALL-IDB2 dataset, and 98.64% on the combined dataset, demonstrating its potential for accurate and reliable leukemia diagnosis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14536v1-abstract-full').style.display = 'none'; document.getElementById('2410.14536v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14489">arXiv:2410.14489</a> <span> [<a href="https://arxiv.org/pdf/2410.14489">pdf</a>, <a href="https://arxiv.org/format/2410.14489">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Integrated Deep Learning Model for Skin Cancer Detection Using Hybrid Feature Fusion Technique </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Akter%2C+M">Maksuda Akter</a>, <a href="/search/cs?searchtype=author&query=Khatun%2C+R">Rabea Khatun</a>, <a href="/search/cs?searchtype=author&query=Talukder%2C+M+A">Md. Alamin Talukder</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md. Manowarul Islam</a>, <a href="/search/cs?searchtype=author&query=Uddin%2C+M+A">Md. Ashraf Uddin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14489v2-abstract-short" style="display: inline;"> Skin cancer is a serious and potentially fatal disease caused by DNA damage. Early detection significantly increases survival rates, making accurate diagnosis crucial. In this groundbreaking study, we present a hybrid framework based on Deep Learning (DL) that achieves precise classification of benign and malignant skin lesions. Our approach begins with dataset preprocessing to enhance classificat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14489v2-abstract-full').style.display = 'inline'; document.getElementById('2410.14489v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14489v2-abstract-full" style="display: none;"> Skin cancer is a serious and potentially fatal disease caused by DNA damage. Early detection significantly increases survival rates, making accurate diagnosis crucial. In this groundbreaking study, we present a hybrid framework based on Deep Learning (DL) that achieves precise classification of benign and malignant skin lesions. Our approach begins with dataset preprocessing to enhance classification accuracy, followed by training two separate pre-trained DL models, InceptionV3 and DenseNet121. By fusing the results of each model using the weighted sum rule, our system achieves exceptional accuracy rates. Specifically, we achieve a 92.27% detection accuracy rate, 92.33% sensitivity, 92.22% specificity, 90.81% precision, and 91.57% F1-score, outperforming existing models and demonstrating the robustness and trustworthiness of our hybrid approach. Our study represents a significant advance in skin cancer diagnosis and provides a promising foundation for further research in the field. With the potential to save countless lives through earlier detection, our hybrid deep-learning approach is a game-changer in the fight against skin cancer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14489v2-abstract-full').style.display = 'none'; document.getElementById('2410.14489v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14433">arXiv:2410.14433</a> <span> [<a href="https://arxiv.org/pdf/2410.14433">pdf</a>, <a href="https://arxiv.org/format/2410.14433">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Bioinformatic Approach Validated Utilizing Machine Learning Algorithms to Identify Relevant Biomarkers and Crucial Pathways in Gallbladder Cancer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khatun%2C+R">Rabea Khatun</a>, <a href="/search/cs?searchtype=author&query=Tasnim%2C+W">Wahia Tasnim</a>, <a href="/search/cs?searchtype=author&query=Akter%2C+M">Maksuda Akter</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Manowarul Islam</a>, <a href="/search/cs?searchtype=author&query=Uddin%2C+M+A">Md. Ashraf Uddin</a>, <a href="/search/cs?searchtype=author&query=Mahmud%2C+M+Z">Md. Zulfiker Mahmud</a>, <a href="/search/cs?searchtype=author&query=Das%2C+S+C">Saurav Chandra Das</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14433v1-abstract-short" style="display: inline;"> Gallbladder cancer (GBC) is the most frequent cause of disease among biliary tract neoplasms. Identifying the molecular mechanisms and biomarkers linked to GBC progression has been a significant challenge in scientific research. Few recent studies have explored the roles of biomarkers in GBC. Our study aimed to identify biomarkers in GBC using machine learning (ML) and bioinformatics techniques. W… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14433v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14433v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14433v1-abstract-full" style="display: none;"> Gallbladder cancer (GBC) is the most frequent cause of disease among biliary tract neoplasms. Identifying the molecular mechanisms and biomarkers linked to GBC progression has been a significant challenge in scientific research. Few recent studies have explored the roles of biomarkers in GBC. Our study aimed to identify biomarkers in GBC using machine learning (ML) and bioinformatics techniques. We compared GBC tumor samples with normal samples to identify differentially expressed genes (DEGs) from two microarray datasets (GSE100363, GSE139682) obtained from the NCBI GEO database. A total of 146 DEGs were found, with 39 up-regulated and 107 down-regulated genes. Functional enrichment analysis of these DEGs was performed using Gene Ontology (GO) terms and REACTOME pathways through DAVID. The protein-protein interaction network was constructed using the STRING database. To identify hub genes, we applied three ranking algorithms: Degree, MNC, and Closeness Centrality. The intersection of hub genes from these algorithms yielded 11 hub genes. Simultaneously, two feature selection methods (Pearson correlation and recursive feature elimination) were used to identify significant gene subsets. We then developed ML models using SVM and RF on the GSE100363 dataset, with validation on GSE139682, to determine the gene subset that best distinguishes GBC samples. The hub genes outperformed the other gene subsets. Finally, NTRK2, COL14A1, SCN4B, ATP1A2, SLC17A7, SLIT3, COL7A1, CLDN4, CLEC3B, ADCYAP1R1, and MFAP4 were identified as crucial genes, with SLIT3, COL7A1, and CLDN4 being strongly linked to GBC development and prediction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14433v1-abstract-full').style.display = 'none'; document.getElementById('2410.14433v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05041">arXiv:2410.05041</a> <span> [<a href="https://arxiv.org/pdf/2410.05041">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Systematic Literature Review of Vision-Based Approaches to Outdoor Livestock Monitoring with Lessons from Wildlife Studies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Scott%2C+S+D">Stacey D. Scott</a>, <a href="/search/cs?searchtype=author&query=Abbas%2C+Z+J">Zayn J. Abbas</a>, <a href="/search/cs?searchtype=author&query=Ellid%2C+F">Feerass Ellid</a>, <a href="/search/cs?searchtype=author&query=Dykhne%2C+E">Eli-Henry Dykhne</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Muhammad Muhaiminul Islam</a>, <a href="/search/cs?searchtype=author&query=Ayad%2C+W">Weam Ayad</a>, <a href="/search/cs?searchtype=author&query=Kacmorova%2C+K">Kristina Kacmorova</a>, <a href="/search/cs?searchtype=author&query=Tulpan%2C+D">Dan Tulpan</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+M">Minglun Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05041v1-abstract-short" style="display: inline;"> Precision livestock farming (PLF) aims to improve the health and welfare of livestock animals and farming outcomes through the use of advanced technologies. Computer vision, combined with recent advances in machine learning and deep learning artificial intelligence approaches, offers a possible solution to the PLF ideal of 24/7 livestock monitoring that helps facilitate early detection of animal h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05041v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05041v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05041v1-abstract-full" style="display: none;"> Precision livestock farming (PLF) aims to improve the health and welfare of livestock animals and farming outcomes through the use of advanced technologies. Computer vision, combined with recent advances in machine learning and deep learning artificial intelligence approaches, offers a possible solution to the PLF ideal of 24/7 livestock monitoring that helps facilitate early detection of animal health and welfare issues. However, a significant number of livestock species are raised in large outdoor habitats that pose technological challenges for computer vision approaches. This review provides a comprehensive overview of computer vision methods and open challenges in outdoor animal monitoring. We include research from both the livestock and wildlife fields in the review because of the similarities in appearance, behaviour, and habitat for many livestock and wildlife. We focus on large terrestrial mammals, such as cattle, horses, deer, goats, sheep, koalas, giraffes, and elephants. We use an image processing pipeline to frame our discussion and highlight the current capabilities and open technical challenges at each stage of the pipeline. The review found a clear trend towards the use of deep learning approaches for animal detection, counting, and multi-species classification. We discuss in detail the applicability of current vision-based methods to PLF contexts and promising directions for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05041v1-abstract-full').style.display = 'none'; document.getElementById('2410.05041v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 5 figures, 2 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> CSL-2024-01 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.10; I.2.6; J.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.20557">arXiv:2409.20557</a> <span> [<a href="https://arxiv.org/pdf/2409.20557">pdf</a>, <a href="https://arxiv.org/format/2409.20557">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Propose, Assess, Search: Harnessing LLMs for Goal-Oriented Planning in Instructional Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mohaiminul Islam</a>, <a href="/search/cs?searchtype=author&query=Nagarajan%2C+T">Tushar Nagarajan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Huiyu Wang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+F">Fu-Jen Chu</a>, <a href="/search/cs?searchtype=author&query=Kitani%2C+K">Kris Kitani</a>, <a href="/search/cs?searchtype=author&query=Bertasius%2C+G">Gedas Bertasius</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xitong Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.20557v1-abstract-short" style="display: inline;"> Goal-oriented planning, or anticipating a series of actions that transition an agent from its current state to a predefined objective, is crucial for developing intelligent assistants aiding users in daily procedural tasks. The problem presents significant challenges due to the need for comprehensive knowledge of temporal and hierarchical task structures, as well as strong capabilities in reasonin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.20557v1-abstract-full').style.display = 'inline'; document.getElementById('2409.20557v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.20557v1-abstract-full" style="display: none;"> Goal-oriented planning, or anticipating a series of actions that transition an agent from its current state to a predefined objective, is crucial for developing intelligent assistants aiding users in daily procedural tasks. The problem presents significant challenges due to the need for comprehensive knowledge of temporal and hierarchical task structures, as well as strong capabilities in reasoning and planning. To achieve this, prior work typically relies on extensive training on the target dataset, which often results in significant dataset bias and a lack of generalization to unseen tasks. In this work, we introduce VidAssist, an integrated framework designed for zero/few-shot goal-oriented planning in instructional videos. VidAssist leverages large language models (LLMs) as both the knowledge base and the assessment tool for generating and evaluating action plans, thus overcoming the challenges of acquiring procedural knowledge from small-scale, low-diversity datasets. Moreover, VidAssist employs a breadth-first search algorithm for optimal plan generation, in which a composite of value functions designed for goal-oriented planning is utilized to assess the predicted actions at each step. Extensive experiments demonstrate that VidAssist offers a unified framework for different goal-oriented planning setups, e.g., visual planning for assistance (VPA) and procedural planning (PP), and achieves remarkable performance in zero-shot and few-shot setups. Specifically, our few-shot model outperforms the prior fully supervised state-of-the-art method by +7.7% in VPA and +4.81% PP task on the COIN dataset while predicting 4 future actions. Code, and models are publicly available at https://sites.google.com/view/vidassist. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.20557v1-abstract-full').style.display = 'none'; document.getElementById('2409.20557v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ECCV 2024 (Oral)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12328">arXiv:2409.12328</a> <span> [<a href="https://arxiv.org/pdf/2409.12328">pdf</a>, <a href="https://arxiv.org/format/2409.12328">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> SplitVAEs: Decentralized scenario generation from siloed data for stochastic optimization problems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+H+M+M">H M Mohaimanul Islam</a>, <a href="/search/cs?searchtype=author&query=Vo%2C+H+Q+N">Huynh Q. N. Vo</a>, <a href="/search/cs?searchtype=author&query=Ramanan%2C+P">Paritosh Ramanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12328v1-abstract-short" style="display: inline;"> Stochastic optimization problems in large-scale multi-stakeholder networked systems (e.g., power grids and supply chains) rely on data-driven scenarios to encapsulate complex spatiotemporal interdependencies. However, centralized aggregation of stakeholder data is challenging due to the existence of data silos resulting from computational and logistical bottlenecks. In this paper, we present Split… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12328v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12328v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12328v1-abstract-full" style="display: none;"> Stochastic optimization problems in large-scale multi-stakeholder networked systems (e.g., power grids and supply chains) rely on data-driven scenarios to encapsulate complex spatiotemporal interdependencies. However, centralized aggregation of stakeholder data is challenging due to the existence of data silos resulting from computational and logistical bottlenecks. In this paper, we present SplitVAEs, a decentralized scenario generation framework that leverages variational autoencoders to generate high-quality scenarios without moving stakeholder data. With the help of experiments on distributed memory systems, we demonstrate the broad applicability of SplitVAEs in a variety of domain areas that are dominated by a large number of stakeholders. Our experiments indicate that SplitVAEs can learn spatial and temporal interdependencies in large-scale networks to generate scenarios that match the joint historical distribution of stakeholder data in a decentralized manner. Our experiments show that SplitVAEs deliver robust performance compared to centralized, state-of-the-art benchmark methods while significantly reducing data transmission costs, leading to a scalable, privacy-enhancing alternative to scenario generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12328v1-abstract-full').style.display = 'none'; document.getElementById('2409.12328v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.01028">arXiv:2408.01028</a> <span> [<a href="https://arxiv.org/pdf/2408.01028">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Mesoscale and Nanoscale Physics">cond-mat.mes-hall</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Harnessing Ferro-Valleytricity in Penta-Layer Rhombohedral Graphene for Memory and Compute </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mazharul Islam</a>, <a href="/search/cs?searchtype=author&query=Alam%2C+S">Shamiul Alam</a>, <a href="/search/cs?searchtype=author&query=Udoy%2C+M+R+I">Md Rahatul Islam Udoy</a>, <a href="/search/cs?searchtype=author&query=Hossain%2C+M+S">Md Shafayat Hossain</a>, <a href="/search/cs?searchtype=author&query=Hamilton%2C+K+E">Kathleen E Hamilton</a>, <a href="/search/cs?searchtype=author&query=Aziz%2C+A">Ahmedullah Aziz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.01028v1-abstract-short" style="display: inline;"> Two-dimensional materials with multiple degrees of freedom, including spin, valleys, and orbitals, open up an exciting avenue for engineering multifunctional devices. Beyond spintronics, these degrees of freedom can lead to novel quantum effects such as valley-dependent Hall effects and orbital magnetism, which could revolutionize next-generation electronics. However, achieving independent control… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01028v1-abstract-full').style.display = 'inline'; document.getElementById('2408.01028v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.01028v1-abstract-full" style="display: none;"> Two-dimensional materials with multiple degrees of freedom, including spin, valleys, and orbitals, open up an exciting avenue for engineering multifunctional devices. Beyond spintronics, these degrees of freedom can lead to novel quantum effects such as valley-dependent Hall effects and orbital magnetism, which could revolutionize next-generation electronics. However, achieving independent control over valley polarization and orbital magnetism has been a challenge due to the need for large electric fields. A recent breakthrough involving penta-layer rhombohedral graphene has demonstrated the ability to individually manipulate anomalous Hall signals and orbital magnetic hysteresis, forming what is known as a valley-magnetic quartet. Here, we leverage the electrically tunable Ferro-valleytricity of penta-layer rhombohedral graphene to develop non-volatile memory and in-memory computation applications. We propose an architecture for a dense, scalable, and selector-less non-volatile memory array that harnesses the electrically tunable ferro-valleytricity. In our designed array architecture, non-destructive read and write operations are conducted by sensing the valley state through two different pairs of terminals, allowing for independent optimization of read/write peripheral circuits. The power consumption of our PRG-based array is remarkably low, with only ~ 6 nW required per write operation and ~ 2.3 nW per read operation per cell. This consumption is orders of magnitude lower than that of the majority of state-of-the-art cryogenic memories. Additionally, we engineer in-memory computation by implementing majority logic operations within our proposed non-volatile memory array without modifying the peripheral circuitry. Our framework presents a promising pathway toward achieving ultra-dense cryogenic memory and in-memory computation capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01028v1-abstract-full').style.display = 'none'; document.getElementById('2408.01028v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00029">arXiv:2408.00029</a> <span> [<a href="https://arxiv.org/pdf/2408.00029">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> A New Horizon of Data Communication through Quantum Entanglement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+S+M+R">S. M. Rashadul Islam</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Manirul Islam</a>, <a href="/search/cs?searchtype=author&query=Salsabil%2C+U">Umme Salsabil</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00029v1-abstract-short" style="display: inline;"> By the blessing of our existing data communication system, we can communicate or share our information with each other in every nook and corner of the world within some few seconds but there are some limitations in our traditional data communication system. Every day we are trying to overcome these limitations and improve our systems for better performance. Among them some problems may not be reso… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00029v1-abstract-full').style.display = 'inline'; document.getElementById('2408.00029v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00029v1-abstract-full" style="display: none;"> By the blessing of our existing data communication system, we can communicate or share our information with each other in every nook and corner of the world within some few seconds but there are some limitations in our traditional data communication system. Every day we are trying to overcome these limitations and improve our systems for better performance. Among them some problems may not be resolvable, for the reason of very basic or root dependencies of physics. In this paper, we have clarified some main drawbacks in our traditional communication system and provided a conceptual model to overcome these issues by using mystic Quantum Entanglement theorem rather than classical or modern physics phenomenon. In the end, we introduced a possible Quantum circuit diagram and Quantum network architecture for end-to-end data communication. It is predicted that through this hypothetical model data can be transmitted faster than light and it will be 100% real time between any distances without any kinds of traditional communication medium that are being used to date. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00029v1-abstract-full').style.display = 'none'; document.getElementById('2408.00029v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.08862">arXiv:2406.08862</a> <span> [<a href="https://arxiv.org/pdf/2406.08862">pdf</a>, <a href="https://arxiv.org/format/2406.08862">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Cognitively Inspired Energy-Based World Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gladstone%2C+A">Alexi Gladstone</a>, <a href="/search/cs?searchtype=author&query=Nanduru%2C+G">Ganesh Nanduru</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mofijul Islam</a>, <a href="/search/cs?searchtype=author&query=Chadha%2C+A">Aman Chadha</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jundong Li</a>, <a href="/search/cs?searchtype=author&query=Iqbal%2C+T">Tariq Iqbal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.08862v1-abstract-short" style="display: inline;"> One of the predominant methods for training world models is autoregressive prediction in the output space of the next element of a sequence. In Natural Language Processing (NLP), this takes the form of Large Language Models (LLMs) predicting the next token; in Computer Vision (CV), this takes the form of autoregressive models predicting the next frame/token/pixel. However, this approach differs fr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08862v1-abstract-full').style.display = 'inline'; document.getElementById('2406.08862v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.08862v1-abstract-full" style="display: none;"> One of the predominant methods for training world models is autoregressive prediction in the output space of the next element of a sequence. In Natural Language Processing (NLP), this takes the form of Large Language Models (LLMs) predicting the next token; in Computer Vision (CV), this takes the form of autoregressive models predicting the next frame/token/pixel. However, this approach differs from human cognition in several respects. First, human predictions about the future actively influence internal cognitive processes. Second, humans naturally evaluate the plausibility of predictions regarding future states. Based on this capability, and third, by assessing when predictions are sufficient, humans allocate a dynamic amount of time to make a prediction. This adaptive process is analogous to System 2 thinking in psychology. All these capabilities are fundamental to the success of humans at high-level reasoning and planning. Therefore, to address the limitations of traditional autoregressive models lacking these human-like capabilities, we introduce Energy-Based World Models (EBWM). EBWM involves training an Energy-Based Model (EBM) to predict the compatibility of a given context and a predicted future state. In doing so, EBWM enables models to achieve all three facets of human cognition described. Moreover, we developed a variant of the traditional autoregressive transformer tailored for Energy-Based models, termed the Energy-Based Transformer (EBT). Our results demonstrate that EBWM scales better with data and GPU Hours than traditional autoregressive transformers in CV, and that EBWM offers promising early scaling in NLP. Consequently, this approach offers an exciting path toward training future models capable of System 2 thinking and intelligently searching across state spaces. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08862v1-abstract-full').style.display = 'none'; document.getElementById('2406.08862v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02450">arXiv:2406.02450</a> <span> [<a href="https://arxiv.org/pdf/2406.02450">pdf</a>, <a href="https://arxiv.org/format/2406.02450">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Generalized Apprenticeship Learning Framework for Modeling Heterogeneous Student Pedagogical Strategies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mirajul Islam</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xi Yang</a>, <a href="/search/cs?searchtype=author&query=Hostetter%2C+J">John Hostetter</a>, <a href="/search/cs?searchtype=author&query=Saha%2C+A+S">Adittya Soukarjya Saha</a>, <a href="/search/cs?searchtype=author&query=Chi%2C+M">Min Chi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02450v1-abstract-short" style="display: inline;"> A key challenge in e-learning environments like Intelligent Tutoring Systems (ITSs) is to induce effective pedagogical policies efficiently. While Deep Reinforcement Learning (DRL) often suffers from sample inefficiency and reward function design difficulty, Apprenticeship Learning(AL) algorithms can overcome them. However, most AL algorithms can not handle heterogeneity as they assume all demonst… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02450v1-abstract-full').style.display = 'inline'; document.getElementById('2406.02450v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02450v1-abstract-full" style="display: none;"> A key challenge in e-learning environments like Intelligent Tutoring Systems (ITSs) is to induce effective pedagogical policies efficiently. While Deep Reinforcement Learning (DRL) often suffers from sample inefficiency and reward function design difficulty, Apprenticeship Learning(AL) algorithms can overcome them. However, most AL algorithms can not handle heterogeneity as they assume all demonstrations are generated with a homogeneous policy driven by a single reward function. Still, some AL algorithms which consider heterogeneity, often can not generalize to large continuous state space and only work with discrete states. In this paper, we propose an expectation-maximization(EM)-EDM, a general AL framework to induce effective pedagogical policies from given optimal or near-optimal demonstrations, which are assumed to be driven by heterogeneous reward functions. We compare the effectiveness of the policies induced by our proposed EM-EDM against four AL-based baselines and two policies induced by DRL on two different but related tasks that involve pedagogical action prediction. Our overall results showed that, for both tasks, EM-EDM outperforms the four AL baselines across all performance metrics and the two DRL baselines. This suggests that EM-EDM can effectively model complex student pedagogical decision-making processes through the ability to manage a large, continuous state space and adapt to handle diverse and heterogeneous reward functions with very few given demonstrations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02450v1-abstract-full').style.display = 'none'; document.getElementById('2406.02450v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.06667">arXiv:2405.06667</a> <span> [<a href="https://arxiv.org/pdf/2405.06667">pdf</a>, <a href="https://arxiv.org/format/2405.06667">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Sentiment Polarity Analysis of Bangla Food Reviews Using Machine and Deep Learning Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Amin%2C+A">Al Amin</a>, <a href="/search/cs?searchtype=author&query=Sarkar%2C+A">Anik Sarkar</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mahamodul Islam</a>, <a href="/search/cs?searchtype=author&query=Miazee%2C+A+A">Asif Ahammad Miazee</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+R">Md Robiul Islam</a>, <a href="/search/cs?searchtype=author&query=Hoque%2C+M+M">Md Mahmudul Hoque</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.06667v1-abstract-short" style="display: inline;"> The Internet has become an essential tool for people in the modern world. Humans, like all living organisms, have essential requirements for survival. These include access to atmospheric oxygen, potable water, protective shelter, and sustenance. The constant flux of the world is making our existence less complicated. A significant portion of the population utilizes online food ordering services to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06667v1-abstract-full').style.display = 'inline'; document.getElementById('2405.06667v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.06667v1-abstract-full" style="display: none;"> The Internet has become an essential tool for people in the modern world. Humans, like all living organisms, have essential requirements for survival. These include access to atmospheric oxygen, potable water, protective shelter, and sustenance. The constant flux of the world is making our existence less complicated. A significant portion of the population utilizes online food ordering services to have meals delivered to their residences. Although there are numerous methods for ordering food, customers sometimes experience disappointment with the food they receive. Our endeavor was to establish a model that could determine if food is of good or poor quality. We compiled an extensive dataset of over 1484 online reviews from prominent food ordering platforms, including Food Panda and HungryNaki. Leveraging the collected data, a rigorous assessment of various deep learning and machine learning techniques was performed to determine the most accurate approach for predicting food quality. Out of all the algorithms evaluated, logistic regression emerged as the most accurate, achieving an impressive 90.91% accuracy. The review offers valuable insights that will guide the user in deciding whether or not to order the food. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06667v1-abstract-full').style.display = 'none'; document.getElementById('2405.06667v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.17960">arXiv:2404.17960</a> <span> [<a href="https://arxiv.org/pdf/2404.17960">pdf</a>, <a href="https://arxiv.org/format/2404.17960">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PhishGuard: A Convolutional Neural Network Based Model for Detecting Phishing URLs with Explainability Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+M+R">Md Robiul Islam</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mahamodul Islam</a>, <a href="/search/cs?searchtype=author&query=Afrin%2C+M+S">Mst. Suraiya Afrin</a>, <a href="/search/cs?searchtype=author&query=Antara%2C+A">Anika Antara</a>, <a href="/search/cs?searchtype=author&query=Tabassum%2C+N">Nujhat Tabassum</a>, <a href="/search/cs?searchtype=author&query=Amin%2C+A">Al Amin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.17960v1-abstract-short" style="display: inline;"> Cybersecurity is one of the global issues because of the extensive dependence on cyber systems of individuals, industries, and organizations. Among the cyber attacks, phishing is increasing tremendously and affecting the global economy. Therefore, this phenomenon highlights the vital need for enhancing user awareness and robust support at both individual and organizational levels. Phishing URL ide… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17960v1-abstract-full').style.display = 'inline'; document.getElementById('2404.17960v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.17960v1-abstract-full" style="display: none;"> Cybersecurity is one of the global issues because of the extensive dependence on cyber systems of individuals, industries, and organizations. Among the cyber attacks, phishing is increasing tremendously and affecting the global economy. Therefore, this phenomenon highlights the vital need for enhancing user awareness and robust support at both individual and organizational levels. Phishing URL identification is the best way to address the problem. Various machine learning and deep learning methods have been proposed to automate the detection of phishing URLs. However, these approaches often need more convincing accuracy and rely on datasets consisting of limited samples. Furthermore, these black box intelligent models decision to detect suspicious URLs needs proper explanation to understand the features affecting the output. To address the issues, we propose a 1D Convolutional Neural Network (CNN) and trained the model with extensive features and a substantial amount of data. The proposed model outperforms existing works by attaining an accuracy of 99.85%. Additionally, our explainability analysis highlights certain features that significantly contribute to identifying the phishing URL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17960v1-abstract-full').style.display = 'none'; document.getElementById('2404.17960v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.03606">arXiv:2404.03606</a> <span> [<a href="https://arxiv.org/pdf/2404.03606">pdf</a>, <a href="https://arxiv.org/format/2404.03606">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Analyzing Musical Characteristics of National Anthems in Relation to Global Indices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hasan%2C+S+M+R">S M Rakib Hasan</a>, <a href="/search/cs?searchtype=author&query=Dhakal%2C+A">Aakar Dhakal</a>, <a href="/search/cs?searchtype=author&query=Siddiqua%2C+M+A">Ms. Ayesha Siddiqua</a>, <a href="/search/cs?searchtype=author&query=Rahman%2C+M+M">Mohammad Mominur Rahman</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Maidul Islam</a>, <a href="/search/cs?searchtype=author&query=Chowdhury%2C+M+A+R">Mohammed Arfat Raihan Chowdhury</a>, <a href="/search/cs?searchtype=author&query=Swapno%2C+S+M+M+R">S M Masfequier Rahman Swapno</a>, <a href="/search/cs?searchtype=author&query=Nobel%2C+S+N">SM Nuruzzaman Nobel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.03606v1-abstract-short" style="display: inline;"> Music plays a huge part in shaping peoples' psychology and behavioral patterns. This paper investigates the connection between national anthems and different global indices with computational music analysis and statistical correlation analysis. We analyze national anthem musical data to determine whether certain musical characteristics are associated with peace, happiness, suicide rate, crime rate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.03606v1-abstract-full').style.display = 'inline'; document.getElementById('2404.03606v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.03606v1-abstract-full" style="display: none;"> Music plays a huge part in shaping peoples' psychology and behavioral patterns. This paper investigates the connection between national anthems and different global indices with computational music analysis and statistical correlation analysis. We analyze national anthem musical data to determine whether certain musical characteristics are associated with peace, happiness, suicide rate, crime rate, etc. To achieve this, we collect national anthems from 169 countries and use computational music analysis techniques to extract pitch, tempo, beat, and other pertinent audio features. We then compare these musical characteristics with data on different global indices to ascertain whether a significant correlation exists. Our findings indicate that there may be a correlation between the musical characteristics of national anthems and the indices we investigated. The implications of our findings for music psychology and policymakers interested in promoting social well-being are discussed. This paper emphasizes the potential of musical data analysis in social research and offers a novel perspective on the relationship between music and social indices. The source code and data are made open-access for reproducibility and future research endeavors. It can be accessed at http://bit.ly/na_code. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.03606v1-abstract-full').style.display = 'none'; document.getElementById('2404.03606v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.18949">arXiv:2403.18949</a> <span> [<a href="https://arxiv.org/pdf/2403.18949">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Other Computer Science">cs.OH</span> </div> </div> <p class="title is-5 mathjax"> An IoT Based Water-Logging Detection System: A Case Study of Dhaka </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Manirul Islam</a>, <a href="/search/cs?searchtype=author&query=Mahamud%2C+M+S">Md. Sadad Mahamud</a>, <a href="/search/cs?searchtype=author&query=Salsabil%2C+U">Umme Salsabil</a>, <a href="/search/cs?searchtype=author&query=Amin%2C+A+A+M+M">A. A. M. Mazharul Amin</a>, <a href="/search/cs?searchtype=author&query=Suman%2C+S+H">Samiul Haque Suman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.18949v1-abstract-short" style="display: inline;"> With a large number of populations, many problems are rising rapidly in Dhaka, the capital city of Bangladesh. Water-logging is one of the major issues among them. Heavy rainfall, lack of awareness and poor maintenance causes bad sewerage system in the city. As a result, water is overflowed on the roads and sometimes it gets mixed with the drinking water. To overcome this problem, this paper reali… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18949v1-abstract-full').style.display = 'inline'; document.getElementById('2403.18949v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.18949v1-abstract-full" style="display: none;"> With a large number of populations, many problems are rising rapidly in Dhaka, the capital city of Bangladesh. Water-logging is one of the major issues among them. Heavy rainfall, lack of awareness and poor maintenance causes bad sewerage system in the city. As a result, water is overflowed on the roads and sometimes it gets mixed with the drinking water. To overcome this problem, this paper realizes the potential of using Internet of Things to combat water-logging in drainage pipes which are used to move wastes as well as rainwater away from the city. The proposed system will continuously monitor real time water level, water flow and gas level inside the drainage pipe. Moreover, all the monitoring data will be stored in the central database for graphical representation and further analysis. In addition to that if any emergency arises in the drainage system, an alert will be sent directly to the nearest maintenance office. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18949v1-abstract-full').style.display = 'none'; document.getElementById('2403.18949v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Global Conference on Technology and Information Management</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05353">arXiv:2403.05353</a> <span> [<a href="https://arxiv.org/pdf/2403.05353">pdf</a>, <a href="https://arxiv.org/format/2403.05353">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICCIT60459.2023.10441274">10.1109/ICCIT60459.2023.10441274 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Hybridized Convolutional Neural Networks and Long Short-Term Memory for Improved Alzheimer's Disease Diagnosis from MRI Scans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khatun%2C+M">Maleka Khatun</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Manowarul Islam</a>, <a href="/search/cs?searchtype=author&query=Rifat%2C+H+R">Habibur Rahman Rifat</a>, <a href="/search/cs?searchtype=author&query=Shahid%2C+M+S+B">Md. Shamim Bin Shahid</a>, <a href="/search/cs?searchtype=author&query=Talukder%2C+M+A">Md. Alamin Talukder</a>, <a href="/search/cs?searchtype=author&query=Uddin%2C+M+A">Md Ashraf Uddin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05353v1-abstract-short" style="display: inline;"> Brain-related diseases are more sensitive than other diseases due to several factors, including the complexity of surgical procedures, high costs, and other challenges. Alzheimer's disease is a common brain disorder that causes memory loss and the shrinking of brain cells. Early detection is critical for providing proper treatment to patients. However, identifying Alzheimer's at an early stage usi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05353v1-abstract-full').style.display = 'inline'; document.getElementById('2403.05353v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05353v1-abstract-full" style="display: none;"> Brain-related diseases are more sensitive than other diseases due to several factors, including the complexity of surgical procedures, high costs, and other challenges. Alzheimer's disease is a common brain disorder that causes memory loss and the shrinking of brain cells. Early detection is critical for providing proper treatment to patients. However, identifying Alzheimer's at an early stage using manual scanning of CT or MRI scans is challenging. Therefore, researchers have delved into the exploration of computer-aided systems, employing Machine Learning and Deep Learning methodologies, which entail the training of datasets to detect Alzheimer's disease. This study aims to present a hybrid model that combines a CNN model's feature extraction capabilities with an LSTM model's detection capabilities. This study has applied the transfer learning called VGG16 in the hybrid model to extract features from MRI images. The LSTM detects features between the convolution layer and the fully connected layer. The output layer of the fully connected layer uses the softmax function. The training of the hybrid model involved utilizing the ADNI dataset. The trial findings revealed that the model achieved a level of accuracy of 98.8%, a sensitivity rate of 100%, and a specificity rate of 76%. The proposed hybrid model outperforms its contemporary CNN counterparts, showcasing a superior performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05353v1-abstract-full').style.display = 'none'; document.getElementById('2403.05353v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted In The 26th International Conference on Computer and Information Technology (ICCIT) On 13-15 December 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.04786">arXiv:2403.04786</a> <span> [<a href="https://arxiv.org/pdf/2403.04786">pdf</a>, <a href="https://arxiv.org/format/2403.04786">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Breaking Down the Defenses: A Comparative Survey of Attacks on Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chowdhury%2C+A+G">Arijit Ghosh Chowdhury</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mofijul Islam</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+V">Vaibhav Kumar</a>, <a href="/search/cs?searchtype=author&query=Shezan%2C+F+H">Faysal Hossain Shezan</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+V">Vaibhav Kumar</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+V">Vinija Jain</a>, <a href="/search/cs?searchtype=author&query=Chadha%2C+A">Aman Chadha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.04786v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have become a cornerstone in the field of Natural Language Processing (NLP), offering transformative capabilities in understanding and generating human-like text. However, with their rising prominence, the security and vulnerability aspects of these models have garnered significant attention. This paper presents a comprehensive survey of the various forms of attacks ta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04786v2-abstract-full').style.display = 'inline'; document.getElementById('2403.04786v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.04786v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have become a cornerstone in the field of Natural Language Processing (NLP), offering transformative capabilities in understanding and generating human-like text. However, with their rising prominence, the security and vulnerability aspects of these models have garnered significant attention. This paper presents a comprehensive survey of the various forms of attacks targeting LLMs, discussing the nature and mechanisms of these attacks, their potential impacts, and current defense strategies. We delve into topics such as adversarial attacks that aim to manipulate model outputs, data poisoning that affects model training, and privacy concerns related to training data exploitation. The paper also explores the effectiveness of different attack methodologies, the resilience of LLMs against these attacks, and the implications for model integrity and user trust. By examining the latest research, we provide insights into the current landscape of LLM vulnerabilities and defense mechanisms. Our objective is to offer a nuanced understanding of LLM attacks, foster awareness within the AI community, and inspire robust solutions to mitigate these risks in future developments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04786v2-abstract-full').style.display = 'none'; document.getElementById('2403.04786v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.17807">arXiv:2402.17807</a> <span> [<a href="https://arxiv.org/pdf/2402.17807">pdf</a>, <a href="https://arxiv.org/format/2402.17807">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Exploring Gene Regulatory Interaction Networks and predicting therapeutic molecules for Hypopharyngeal Cancer and EGFR-mutated lung adenocarcinoma </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bhattacharjya%2C+A">Abanti Bhattacharjya</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Manowarul Islam</a>, <a href="/search/cs?searchtype=author&query=Uddin%2C+M+A">Md Ashraf Uddin</a>, <a href="/search/cs?searchtype=author&query=Talukder%2C+M+A">Md. Alamin Talukder</a>, <a href="/search/cs?searchtype=author&query=Azad%2C+A">AKM Azad</a>, <a href="/search/cs?searchtype=author&query=Aryal%2C+S">Sunil Aryal</a>, <a href="/search/cs?searchtype=author&query=Paul%2C+B+K">Bikash Kumar Paul</a>, <a href="/search/cs?searchtype=author&query=Tasnim%2C+W">Wahia Tasnim</a>, <a href="/search/cs?searchtype=author&query=Almoyad%2C+M+A+A">Muhammad Ali Abdulllah Almoyad</a>, <a href="/search/cs?searchtype=author&query=Moni%2C+M+A">Mohammad Ali Moni</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.17807v1-abstract-short" style="display: inline;"> With the advent of Information technology, the Bioinformatics research field is becoming increasingly attractive to researchers and academicians. The recent development of various Bioinformatics toolkits has facilitated the rapid processing and analysis of vast quantities of biological data for human perception. Most studies focus on locating two connected diseases and making some observations to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17807v1-abstract-full').style.display = 'inline'; document.getElementById('2402.17807v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.17807v1-abstract-full" style="display: none;"> With the advent of Information technology, the Bioinformatics research field is becoming increasingly attractive to researchers and academicians. The recent development of various Bioinformatics toolkits has facilitated the rapid processing and analysis of vast quantities of biological data for human perception. Most studies focus on locating two connected diseases and making some observations to construct diverse gene regulatory interaction networks, a forerunner to general drug design for curing illness. For instance, Hypopharyngeal cancer is a disease that is associated with EGFR-mutated lung adenocarcinoma. In this study, we select EGFR-mutated lung adenocarcinoma and Hypopharyngeal cancer by finding the Lung metastases in hypopharyngeal cancer. To conduct this study, we collect Mircorarray datasets from GEO (Gene Expression Omnibus), an online database controlled by NCBI. Differentially expressed genes, common genes, and hub genes between the selected two diseases are detected for the succeeding move. Our research findings have suggested common therapeutic molecules for the selected diseases based on 10 hub genes with the highest interactions according to the degree topology method and the maximum clique centrality (MCC). Our suggested therapeutic molecules will be fruitful for patients with those two diseases simultaneously. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17807v1-abstract-full').style.display = 'none'; document.getElementById('2402.17807v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted In The FEBS OPEN BIO (Q2, SCOPUS, SCIE, IF: 2.6, CS: 4.7), Wiley Journal, On FEB 25, 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.13277">arXiv:2402.13277</a> <span> [<a href="https://arxiv.org/pdf/2402.13277">pdf</a>, <a href="https://arxiv.org/format/2402.13277">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MLSTL-WSN: Machine Learning-based Intrusion Detection using SMOTETomek in WSNs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Talukder%2C+M+A">Md. Alamin Talukder</a>, <a href="/search/cs?searchtype=author&query=Sharmin%2C+S">Selina Sharmin</a>, <a href="/search/cs?searchtype=author&query=Uddin%2C+M+A">Md Ashraf Uddin</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Manowarul Islam</a>, <a href="/search/cs?searchtype=author&query=Aryal%2C+S">Sunil Aryal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.13277v2-abstract-short" style="display: inline;"> Wireless Sensor Networks (WSNs) play a pivotal role as infrastructures, encompassing both stationary and mobile sensors. These sensors self-organize and establish multi-hop connections for communication, collectively sensing, gathering, processing, and transmitting data about their surroundings. Despite their significance, WSNs face rapid and detrimental attacks that can disrupt functionality. Exi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13277v2-abstract-full').style.display = 'inline'; document.getElementById('2402.13277v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.13277v2-abstract-full" style="display: none;"> Wireless Sensor Networks (WSNs) play a pivotal role as infrastructures, encompassing both stationary and mobile sensors. These sensors self-organize and establish multi-hop connections for communication, collectively sensing, gathering, processing, and transmitting data about their surroundings. Despite their significance, WSNs face rapid and detrimental attacks that can disrupt functionality. Existing intrusion detection methods for WSNs encounter challenges such as low detection rates, computational overhead, and false alarms. These issues stem from sensor node resource constraints, data redundancy, and high correlation within the network. To address these challenges, we propose an innovative intrusion detection approach that integrates Machine Learning (ML) techniques with the Synthetic Minority Oversampling Technique Tomek Link (SMOTE-TomekLink) algorithm. This blend synthesizes minority instances and eliminates Tomek links, resulting in a balanced dataset that significantly enhances detection accuracy in WSNs. Additionally, we incorporate feature scaling through standardization to render input features consistent and scalable, facilitating more precise training and detection. To counteract imbalanced WSN datasets, we employ the SMOTE-Tomek resampling technique, mitigating overfitting and underfitting issues. Our comprehensive evaluation, using the WSN Dataset (WSN-DS) containing 374,661 records, identifies the optimal model for intrusion detection in WSNs. The standout outcome of our research is the remarkable performance of our model. In binary, it achieves an accuracy rate of 99.78% and in multiclass, it attains an exceptional accuracy rate of 99.92%. These findings underscore the efficiency and superiority of our proposal in the context of WSN intrusion detection, showcasing its effectiveness in detecting and mitigating intrusions in WSNs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13277v2-abstract-full').style.display = 'none'; document.getElementById('2402.13277v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">International Journal of Information Security, Springer Journal - Q1, Scopus, ISI, SCIE, IF: 3.2 - Accepted on Jan 17, 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.13250">arXiv:2402.13250</a> <span> [<a href="https://arxiv.org/pdf/2402.13250">pdf</a>, <a href="https://arxiv.org/format/2402.13250">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Video ReCap: Recursive Captioning of Hour-Long Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mohaiminul Islam</a>, <a href="/search/cs?searchtype=author&query=Ho%2C+N">Ngan Ho</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xitong Yang</a>, <a href="/search/cs?searchtype=author&query=Nagarajan%2C+T">Tushar Nagarajan</a>, <a href="/search/cs?searchtype=author&query=Torresani%2C+L">Lorenzo Torresani</a>, <a href="/search/cs?searchtype=author&query=Bertasius%2C+G">Gedas Bertasius</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.13250v6-abstract-short" style="display: inline;"> Most video captioning models are designed to process short video clips of few seconds and output text describing low-level visual concepts (e.g., objects, scenes, atomic actions). However, most real-world videos last for minutes or hours and have a complex hierarchical structure spanning different temporal granularities. We propose Video ReCap, a recursive video captioning model that can process v… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13250v6-abstract-full').style.display = 'inline'; document.getElementById('2402.13250v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.13250v6-abstract-full" style="display: none;"> Most video captioning models are designed to process short video clips of few seconds and output text describing low-level visual concepts (e.g., objects, scenes, atomic actions). However, most real-world videos last for minutes or hours and have a complex hierarchical structure spanning different temporal granularities. We propose Video ReCap, a recursive video captioning model that can process video inputs of dramatically different lengths (from 1 second to 2 hours) and output video captions at multiple hierarchy levels. The recursive video-language architecture exploits the synergy between different video hierarchies and can process hour-long videos efficiently. We utilize a curriculum learning training scheme to learn the hierarchical structure of videos, starting from clip-level captions describing atomic actions, then focusing on segment-level descriptions, and concluding with generating summaries for hour-long videos. Furthermore, we introduce Ego4D-HCap dataset by augmenting Ego4D with 8,267 manually collected long-range video summaries. Our recursive model can flexibly generate captions at different hierarchy levels while also being useful for other complex video understanding tasks, such as VideoQA on EgoSchema. Data, code, and models are available at: https://sites.google.com/view/vidrecap <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13250v6-abstract-full').style.display = 'none'; document.getElementById('2402.13250v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.05158">arXiv:2402.05158</a> <span> [<a href="https://arxiv.org/pdf/2402.05158">pdf</a>, <a href="https://arxiv.org/format/2402.05158">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Enhancement of Bengali OCR by Specialized Models and Advanced Techniques for Diverse Document Types </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rabby%2C+A+S+A">AKM Shahariar Azad Rabby</a>, <a href="/search/cs?searchtype=author&query=Ali%2C+H">Hasmot Ali</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md. Majedul Islam</a>, <a href="/search/cs?searchtype=author&query=Abujar%2C+S">Sheikh Abujar</a>, <a href="/search/cs?searchtype=author&query=Rahman%2C+F">Fuad Rahman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.05158v1-abstract-short" style="display: inline;"> This research paper presents a unique Bengali OCR system with some capabilities. The system excels in reconstructing document layouts while preserving structure, alignment, and images. It incorporates advanced image and signature detection for accurate extraction. Specialized models for word segmentation cater to diverse document types, including computer-composed, letterpress, typewriter, and han… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05158v1-abstract-full').style.display = 'inline'; document.getElementById('2402.05158v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.05158v1-abstract-full" style="display: none;"> This research paper presents a unique Bengali OCR system with some capabilities. The system excels in reconstructing document layouts while preserving structure, alignment, and images. It incorporates advanced image and signature detection for accurate extraction. Specialized models for word segmentation cater to diverse document types, including computer-composed, letterpress, typewriter, and handwritten documents. The system handles static and dynamic handwritten inputs, recognizing various writing styles. Furthermore, it has the ability to recognize compound characters in Bengali. Extensive data collection efforts provide a diverse corpus, while advanced technical components optimize character and word recognition. Additional contributions include image, logo, signature and table recognition, perspective correction, layout reconstruction, and a queuing module for efficient and scalable processing. The system demonstrates outstanding performance in efficient and accurate text extraction and analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05158v1-abstract-full').style.display = 'none'; document.getElementById('2402.05158v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 7 figures, 4 table Link of the paper https://openaccess.thecvf.com/content/WACV2024W/WVLL/html/Rabby_Enhancement_of_Bengali_OCR_by_Specialized_Models_and_Advanced_Techniques_WACVW_2024_paper.html</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV) Workshops, 2024, pp. 1102-1109 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.04507">arXiv:2402.04507</a> <span> [<a href="https://arxiv.org/pdf/2402.04507">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Review of Digital Pixel Sensors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Udoy%2C+M+R+I">Md Rahatul Islam Udoy</a>, <a href="/search/cs?searchtype=author&query=Alam%2C+S">Shamiul Alam</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mazharul Islam</a>, <a href="/search/cs?searchtype=author&query=Jaiswal%2C+A">Akhilesh Jaiswal</a>, <a href="/search/cs?searchtype=author&query=Aziz%2C+A">Ahmedullah Aziz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.04507v2-abstract-short" style="display: inline;"> Digital pixel sensor (DPS) has evolved as a pivotal component in modern imaging systems and has the potential to revolutionize various fields such as medical imaging, astronomy, surveillance, IoT devices, etc. Compared to analog pixel sensors, the DPS offers high speed and good image quality. However, the introduced intrinsic complexity within each pixel, primarily attributed to the accommodation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04507v2-abstract-full').style.display = 'inline'; document.getElementById('2402.04507v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.04507v2-abstract-full" style="display: none;"> Digital pixel sensor (DPS) has evolved as a pivotal component in modern imaging systems and has the potential to revolutionize various fields such as medical imaging, astronomy, surveillance, IoT devices, etc. Compared to analog pixel sensors, the DPS offers high speed and good image quality. However, the introduced intrinsic complexity within each pixel, primarily attributed to the accommodation of the ADC circuit, engenders a substantial increase in the pixel pitch. Unfortunately, such a pronounced escalation in pixel pitch drastically undermines the feasibility of achieving high-density integration, which is an obstacle that significantly narrows down the field of potential applications. Nonetheless, designing compact conversion circuits along with strategic integration of 3D architectural paradigms can be a potential remedy to the prevailing situation. This review article presents a comprehensive overview of the vast area of DPS technology. The operating principles, advantages, and challenges of different types of DPS circuits have been analyzed. We categorize the schemes into several categories based on ADC operation. A comparative study based on different performance metrics has also been showcased for a well-rounded understanding. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04507v2-abstract-full').style.display = 'none'; document.getElementById('2402.04507v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.02036">arXiv:2402.02036</a> <span> [<a href="https://arxiv.org/pdf/2402.02036">pdf</a>, <a href="https://arxiv.org/format/2402.02036">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Generating In-Distribution Proxy Graphs for Explaining Graph Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhuomin Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiaxing Zhang</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+J">Jingchao Ni</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaoting Li</a>, <a href="/search/cs?searchtype=author&query=Bian%2C+Y">Yuchen Bian</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mezbahul Islam</a>, <a href="/search/cs?searchtype=author&query=Mondal%2C+A+M">Ananda Mohan Mondal</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+H">Hua Wei</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+D">Dongsheng Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.02036v2-abstract-short" style="display: inline;"> Graph Neural Networks (GNNs) have become a building block in graph data processing, with wide applications in critical domains. The growing needs to deploy GNNs in high-stakes applications necessitate explainability for users in the decision-making processes. A popular paradigm for the explainability of GNNs is to identify explainable subgraphs by comparing their labels with the ones of original g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02036v2-abstract-full').style.display = 'inline'; document.getElementById('2402.02036v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.02036v2-abstract-full" style="display: none;"> Graph Neural Networks (GNNs) have become a building block in graph data processing, with wide applications in critical domains. The growing needs to deploy GNNs in high-stakes applications necessitate explainability for users in the decision-making processes. A popular paradigm for the explainability of GNNs is to identify explainable subgraphs by comparing their labels with the ones of original graphs. This task is challenging due to the substantial distributional shift from the original graphs in the training set to the set of explainable subgraphs, which prevents accurate prediction of labels with the subgraphs. To address it, in this paper, we propose a novel method that generates proxy graphs for explainable subgraphs that are in the distribution of training data. We introduce a parametric method that employs graph generators to produce proxy graphs. A new training objective based on information theory is designed to ensure that proxy graphs not only adhere to the distribution of training data but also preserve explanatory factors. Such generated proxy graphs can be reliably used to approximate the predictions of the labels of explainable subgraphs. Empirical evaluations across various datasets demonstrate our method achieves more accurate explanations for GNNs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02036v2-abstract-full').style.display = 'none'; document.getElementById('2402.02036v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to International Conference on Machine Learning (ICML 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.12262">arXiv:2401.12262</a> <span> [<a href="https://arxiv.org/pdf/2401.12262">pdf</a>, <a href="https://arxiv.org/format/2401.12262">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Machine learning-based network intrusion detection for big and imbalanced data using oversampling, stacking feature embedding and feature extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Talukder%2C+M+A">Md. Alamin Talukder</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md. Manowarul Islam</a>, <a href="/search/cs?searchtype=author&query=Uddin%2C+M+A">Md Ashraf Uddin</a>, <a href="/search/cs?searchtype=author&query=Hasan%2C+K+F">Khondokar Fida Hasan</a>, <a href="/search/cs?searchtype=author&query=Sharmin%2C+S">Selina Sharmin</a>, <a href="/search/cs?searchtype=author&query=Alyami%2C+S+A">Salem A. Alyami</a>, <a href="/search/cs?searchtype=author&query=Moni%2C+M+A">Mohammad Ali Moni</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.12262v1-abstract-short" style="display: inline;"> Cybersecurity has emerged as a critical global concern. Intrusion Detection Systems (IDS) play a critical role in protecting interconnected networks by detecting malicious actors and activities. Machine Learning (ML)-based behavior analysis within the IDS has considerable potential for detecting dynamic cyber threats, identifying abnormalities, and identifying malicious conduct within the network.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.12262v1-abstract-full').style.display = 'inline'; document.getElementById('2401.12262v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.12262v1-abstract-full" style="display: none;"> Cybersecurity has emerged as a critical global concern. Intrusion Detection Systems (IDS) play a critical role in protecting interconnected networks by detecting malicious actors and activities. Machine Learning (ML)-based behavior analysis within the IDS has considerable potential for detecting dynamic cyber threats, identifying abnormalities, and identifying malicious conduct within the network. However, as the number of data grows, dimension reduction becomes an increasingly difficult task when training ML models. Addressing this, our paper introduces a novel ML-based network intrusion detection model that uses Random Oversampling (RO) to address data imbalance and Stacking Feature Embedding based on clustering results, as well as Principal Component Analysis (PCA) for dimension reduction and is specifically designed for large and imbalanced datasets. This model's performance is carefully evaluated using three cutting-edge benchmark datasets: UNSW-NB15, CIC-IDS-2017, and CIC-IDS-2018. On the UNSW-NB15 dataset, our trials show that the RF and ET models achieve accuracy rates of 99.59% and 99.95%, respectively. Furthermore, using the CIC-IDS2017 dataset, DT, RF, and ET models reach 99.99% accuracy, while DT and RF models obtain 99.94% accuracy on CIC-IDS2018. These performance results continuously outperform the state-of-art, indicating significant progress in the field of network intrusion detection. This achievement demonstrates the efficacy of the suggested methodology, which can be used practically to accurately monitor and identify network traffic intrusions, thereby blocking possible threats. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.12262v1-abstract-full').style.display = 'none'; document.getElementById('2401.12262v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in Journal of Big Data (Q1, IF: 8.1, SCIE) on Jan 19, 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.04746">arXiv:2401.04746</a> <span> [<a href="https://arxiv.org/pdf/2401.04746">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Skin Cancer Segmentation and Classification Using Vision Transformer for Automatic Analysis in Dermatoscopy-based Non-invasive Digital System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Himel%2C+G+M+S">Galib Muhammad Shahriar Himel</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md. Masudul Islam</a>, <a href="/search/cs?searchtype=author&query=Al-Aff%2C+K+A">Kh Abdullah Al-Aff</a>, <a href="/search/cs?searchtype=author&query=Karim%2C+S+I">Shams Ibne Karim</a>, <a href="/search/cs?searchtype=author&query=Sikder%2C+M+K+U">Md. Kabir Uddin Sikder</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.04746v1-abstract-short" style="display: inline;"> Skin cancer is a global health concern, necessitating early and accurate diagnosis for improved patient outcomes. This study introduces a groundbreaking approach to skin cancer classification, employing the Vision Transformer, a state-of-the-art deep learning architecture renowned for its success in diverse image analysis tasks. Utilizing the HAM10000 dataset of 10,015 meticulously annotated skin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04746v1-abstract-full').style.display = 'inline'; document.getElementById('2401.04746v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.04746v1-abstract-full" style="display: none;"> Skin cancer is a global health concern, necessitating early and accurate diagnosis for improved patient outcomes. This study introduces a groundbreaking approach to skin cancer classification, employing the Vision Transformer, a state-of-the-art deep learning architecture renowned for its success in diverse image analysis tasks. Utilizing the HAM10000 dataset of 10,015 meticulously annotated skin lesion images, the model undergoes preprocessing for enhanced robustness. The Vision Transformer, adapted to the skin cancer classification task, leverages the self-attention mechanism to capture intricate spatial dependencies, achieving superior performance over traditional deep learning architectures. Segment Anything Model aids in precise segmentation of cancerous areas, attaining high IOU and Dice Coefficient. Extensive experiments highlight the model's supremacy, particularly the Google-based ViT patch-32 variant, which achieves 96.15% accuracy and showcases potential as an effective tool for dermatologists in skin cancer diagnosis, contributing to advancements in dermatological practices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04746v1-abstract-full').style.display = 'none'; document.getElementById('2401.04746v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.04666">arXiv:2401.04666</a> <span> [<a href="https://arxiv.org/pdf/2401.04666">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Benchmark Analysis of Various Pre-trained Deep Learning Models on ASSIRA Cats and Dogs Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Himel%2C+G+M+S">Galib Muhammad Shahriar Himel</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md. Masudul Islam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.04666v1-abstract-short" style="display: inline;"> As the most basic application and implementation of deep learning, image classification has grown in popularity. Various datasets are provided by renowned data science communities for benchmarking machine learning algorithms and pre-trained models. The ASSIRA Cats & Dogs dataset is one of them and is being used in this research for its overall acceptance and benchmark standards. A comparison of va… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04666v1-abstract-full').style.display = 'inline'; document.getElementById('2401.04666v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.04666v1-abstract-full" style="display: none;"> As the most basic application and implementation of deep learning, image classification has grown in popularity. Various datasets are provided by renowned data science communities for benchmarking machine learning algorithms and pre-trained models. The ASSIRA Cats & Dogs dataset is one of them and is being used in this research for its overall acceptance and benchmark standards. A comparison of various pre-trained models is demonstrated by using different types of optimizers and loss functions. Hyper-parameters are changed to gain the best result from a model. By applying this approach, we have got higher accuracy without major changes in the training model. To run the experiment, we used three different computer architectures: a laptop equipped with NVIDIA GeForce GTX 1070, a laptop equipped with NVIDIA GeForce RTX 3080Ti, and a desktop equipped with NVIDIA GeForce RTX 3090. The acquired results demonstrate supremacy in terms of accuracy over the previously done experiments on this dataset. From this experiment, the highest accuracy which is 99.65% is gained using the NASNet Large. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04666v1-abstract-full').style.display = 'none'; document.getElementById('2401.04666v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.04057">arXiv:2401.04057</a> <span> [<a href="https://arxiv.org/pdf/2401.04057">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5281/zenodo.10469839">10.5281/zenodo.10469839 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Unveiling Bias in Fairness Evaluations of Large Language Models: A Critical Literature Review of Music and Movie Recommendation Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sah%2C+C+K">Chandan Kumar Sah</a>, <a href="/search/cs?searchtype=author&query=Xiaoli%2C+L">Lian Xiaoli</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Muhammad Mirajul Islam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.04057v1-abstract-short" style="display: inline;"> The rise of generative artificial intelligence, particularly Large Language Models (LLMs), has intensified the imperative to scrutinize fairness alongside accuracy. Recent studies have begun to investigate fairness evaluations for LLMs within domains such as recommendations. Given that personalization is an intrinsic aspect of recommendation systems, its incorporation into fairness assessments is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04057v1-abstract-full').style.display = 'inline'; document.getElementById('2401.04057v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.04057v1-abstract-full" style="display: none;"> The rise of generative artificial intelligence, particularly Large Language Models (LLMs), has intensified the imperative to scrutinize fairness alongside accuracy. Recent studies have begun to investigate fairness evaluations for LLMs within domains such as recommendations. Given that personalization is an intrinsic aspect of recommendation systems, its incorporation into fairness assessments is paramount. Yet, the degree to which current fairness evaluation frameworks account for personalization remains unclear. Our comprehensive literature review aims to fill this gap by examining how existing frameworks handle fairness evaluations of LLMs, with a focus on the integration of personalization factors. Despite an exhaustive collection and analysis of relevant works, we discovered that most evaluations overlook personalization, a critical facet of recommendation systems, thereby inadvertently perpetuating unfair practices. Our findings shed light on this oversight and underscore the urgent need for more nuanced fairness evaluations that acknowledge personalization. Such improvements are vital for fostering equitable development within the AI community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04057v1-abstract-full').style.display = 'none'; document.getElementById('2401.04057v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.17235">arXiv:2312.17235</a> <span> [<a href="https://arxiv.org/pdf/2312.17235">pdf</a>, <a href="https://arxiv.org/format/2312.17235">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Simple LLM Framework for Long-Range Video Question-Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Ce Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+T">Taixi Lu</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mohaiminul Islam</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziyang Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shoubin Yu</a>, <a href="/search/cs?searchtype=author&query=Bansal%2C+M">Mohit Bansal</a>, <a href="/search/cs?searchtype=author&query=Bertasius%2C+G">Gedas Bertasius</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.17235v3-abstract-short" style="display: inline;"> We present LLoVi, a language-based framework for long-range video question-answering (LVQA). Unlike prior long-range video understanding methods, which are often costly and require specialized long-range video modeling design (e.g., memory queues, state-space layers, etc.), our approach uses a frame/clip-level visual captioner (e.g., BLIP2, LaViLa, LLaVA) coupled with a Large Language Model (GPT-3… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.17235v3-abstract-full').style.display = 'inline'; document.getElementById('2312.17235v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.17235v3-abstract-full" style="display: none;"> We present LLoVi, a language-based framework for long-range video question-answering (LVQA). Unlike prior long-range video understanding methods, which are often costly and require specialized long-range video modeling design (e.g., memory queues, state-space layers, etc.), our approach uses a frame/clip-level visual captioner (e.g., BLIP2, LaViLa, LLaVA) coupled with a Large Language Model (GPT-3.5, GPT-4) leading to a simple yet surprisingly effective LVQA framework. Specifically, we decompose short and long-range modeling aspects of LVQA into two stages. First, we use a short-term visual captioner to generate textual descriptions of short video clips (0.5-8s in length) densely sampled from a long input video. Afterward, an LLM aggregates the densely extracted short-term captions to perform long-range temporal reasoning needed to understand the whole video and answer a question. To analyze what makes our simple framework so effective, we thoroughly evaluate various components of our system. Our empirical analysis reveals that the choice of the visual captioner and LLM is critical for good LVQA performance. Furthermore, we show that a specialized prompt that asks the LLM first to summarize the noisy short-term visual captions and then answer a given input question leads to a significant LVQA performance boost. On EgoSchema, which is best known as a very long-form video question-answering benchmark, our method achieves 50.3% accuracy, outperforming the previous best-performing approach by 18.1% (absolute gain). In addition, our approach outperforms the previous state-of-the-art by 4.1% and 3.1% on NeXT-QA and IntentQA. We also extend LLoVi to grounded LVQA and show that it outperforms all prior methods on the NeXT-GQA dataset. We will release our code at https://github.com/CeeZh/LLoVi. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.17235v3-abstract-full').style.display = 'none'; document.getElementById('2312.17235v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024 main</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.06729">arXiv:2312.06729</a> <span> [<a href="https://arxiv.org/pdf/2312.06729">pdf</a>, <a href="https://arxiv.org/format/2312.06729">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RGNet: A Unified Clip Retrieval and Grounding Network for Long Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hannan%2C+T">Tanveer Hannan</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mohaiminul Islam</a>, <a href="/search/cs?searchtype=author&query=Seidl%2C+T">Thomas Seidl</a>, <a href="/search/cs?searchtype=author&query=Bertasius%2C+G">Gedas Bertasius</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.06729v3-abstract-short" style="display: inline;"> Locating specific moments within long videos (20-120 minutes) presents a significant challenge, akin to finding a needle in a haystack. Adapting existing short video (5-30 seconds) grounding methods to this problem yields poor performance. Since most real life videos, such as those on YouTube and AR/VR, are lengthy, addressing this issue is crucial. Existing methods typically operate in two stages… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.06729v3-abstract-full').style.display = 'inline'; document.getElementById('2312.06729v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.06729v3-abstract-full" style="display: none;"> Locating specific moments within long videos (20-120 minutes) presents a significant challenge, akin to finding a needle in a haystack. Adapting existing short video (5-30 seconds) grounding methods to this problem yields poor performance. Since most real life videos, such as those on YouTube and AR/VR, are lengthy, addressing this issue is crucial. Existing methods typically operate in two stages: clip retrieval and grounding. However, this disjoint process limits the retrieval module's fine-grained event understanding, crucial for specific moment detection. We propose RGNet which deeply integrates clip retrieval and grounding into a single network capable of processing long videos into multiple granular levels, e.g., clips and frames. Its core component is a novel transformer encoder, RG-Encoder, that unifies the two stages through shared features and mutual optimization. The encoder incorporates a sparse attention mechanism and an attention loss to model both granularity jointly. Moreover, we introduce a contrastive clip sampling technique to mimic the long video paradigm closely during training. RGNet surpasses prior methods, showcasing state-of-the-art performance on long video temporal grounding (LVTG) datasets MAD and Ego4D. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.06729v3-abstract-full').style.display = 'none'; document.getElementById('2312.06729v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The code is released at https://github.com/Tanveer81/RGNet</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.18259">arXiv:2311.18259</a> <span> [<a href="https://arxiv.org/pdf/2311.18259">pdf</a>, <a href="https://arxiv.org/format/2311.18259">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Ego-Exo4D: Understanding Skilled Human Activity from First- and Third-Person Perspectives </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Grauman%2C+K">Kristen Grauman</a>, <a href="/search/cs?searchtype=author&query=Westbury%2C+A">Andrew Westbury</a>, <a href="/search/cs?searchtype=author&query=Torresani%2C+L">Lorenzo Torresani</a>, <a href="/search/cs?searchtype=author&query=Kitani%2C+K">Kris Kitani</a>, <a href="/search/cs?searchtype=author&query=Malik%2C+J">Jitendra Malik</a>, <a href="/search/cs?searchtype=author&query=Afouras%2C+T">Triantafyllos Afouras</a>, <a href="/search/cs?searchtype=author&query=Ashutosh%2C+K">Kumar Ashutosh</a>, <a href="/search/cs?searchtype=author&query=Baiyya%2C+V">Vijay Baiyya</a>, <a href="/search/cs?searchtype=author&query=Bansal%2C+S">Siddhant Bansal</a>, <a href="/search/cs?searchtype=author&query=Boote%2C+B">Bikram Boote</a>, <a href="/search/cs?searchtype=author&query=Byrne%2C+E">Eugene Byrne</a>, <a href="/search/cs?searchtype=author&query=Chavis%2C+Z">Zach Chavis</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Joya Chen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+F">Feng Cheng</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+F">Fu-Jen Chu</a>, <a href="/search/cs?searchtype=author&query=Crane%2C+S">Sean Crane</a>, <a href="/search/cs?searchtype=author&query=Dasgupta%2C+A">Avijit Dasgupta</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+J">Jing Dong</a>, <a href="/search/cs?searchtype=author&query=Escobar%2C+M">Maria Escobar</a>, <a href="/search/cs?searchtype=author&query=Forigua%2C+C">Cristhian Forigua</a>, <a href="/search/cs?searchtype=author&query=Gebreselasie%2C+A">Abrham Gebreselasie</a>, <a href="/search/cs?searchtype=author&query=Haresh%2C+S">Sanjay Haresh</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jing Huang</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mohaiminul Islam</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Suyog Jain</a> , et al. (76 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.18259v4-abstract-short" style="display: inline;"> We present Ego-Exo4D, a diverse, large-scale multimodal multiview video dataset and benchmark challenge. Ego-Exo4D centers around simultaneously-captured egocentric and exocentric video of skilled human activities (e.g., sports, music, dance, bike repair). 740 participants from 13 cities worldwide performed these activities in 123 different natural scene contexts, yielding long-form captures from… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.18259v4-abstract-full').style.display = 'inline'; document.getElementById('2311.18259v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.18259v4-abstract-full" style="display: none;"> We present Ego-Exo4D, a diverse, large-scale multimodal multiview video dataset and benchmark challenge. Ego-Exo4D centers around simultaneously-captured egocentric and exocentric video of skilled human activities (e.g., sports, music, dance, bike repair). 740 participants from 13 cities worldwide performed these activities in 123 different natural scene contexts, yielding long-form captures from 1 to 42 minutes each and 1,286 hours of video combined. The multimodal nature of the dataset is unprecedented: the video is accompanied by multichannel audio, eye gaze, 3D point clouds, camera poses, IMU, and multiple paired language descriptions -- including a novel "expert commentary" done by coaches and teachers and tailored to the skilled-activity domain. To push the frontier of first-person video understanding of skilled human activity, we also present a suite of benchmark tasks and their annotations, including fine-grained activity understanding, proficiency estimation, cross-view translation, and 3D hand/body pose. All resources are open sourced to fuel new research in the community. Project page: http://ego-exo4d-data.org/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.18259v4-abstract-full').style.display = 'none'; document.getElementById('2311.18259v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Expanded manuscript (compared to arxiv v1 from Nov 2023 and CVPR 2024 paper from June 2024) for more comprehensive dataset and benchmark presentation, plus new results on v2 data release</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.12823">arXiv:2311.12823</a> <span> [<a href="https://arxiv.org/pdf/2311.12823">pdf</a>, <a href="https://arxiv.org/format/2311.12823">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICSECS58457.2023.10256323">10.1109/ICSECS58457.2023.10256323 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> EWasteNet: A Two-Stream Data Efficient Image Transformer Approach for E-Waste Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+N">Niful Islam</a>, <a href="/search/cs?searchtype=author&query=Jony%2C+M+M+H">Md. Mehedi Hasan Jony</a>, <a href="/search/cs?searchtype=author&query=Hasan%2C+E">Emam Hasan</a>, <a href="/search/cs?searchtype=author&query=Sutradhar%2C+S">Sunny Sutradhar</a>, <a href="/search/cs?searchtype=author&query=Rahman%2C+A">Atikur Rahman</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md. Motaharul Islam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.12823v1-abstract-short" style="display: inline;"> Improper disposal of e-waste poses global environmental and health risks, raising serious concerns. The accurate classification of e-waste images is critical for efficient management and recycling. In this paper, we have presented a comprehensive dataset comprised of eight different classes of images of electronic devices named the E-Waste Vision Dataset. We have also presented EWasteNet, a novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12823v1-abstract-full').style.display = 'inline'; document.getElementById('2311.12823v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.12823v1-abstract-full" style="display: none;"> Improper disposal of e-waste poses global environmental and health risks, raising serious concerns. The accurate classification of e-waste images is critical for efficient management and recycling. In this paper, we have presented a comprehensive dataset comprised of eight different classes of images of electronic devices named the E-Waste Vision Dataset. We have also presented EWasteNet, a novel two-stream approach for precise e-waste image classification based on a data-efficient image transformer (DeiT). The first stream of EWasteNet passes through a sobel operator that detects the edges while the second stream is directed through an Atrous Spatial Pyramid Pooling and attention block where multi-scale contextual information is captured. We train both of the streams simultaneously and their features are merged at the decision level. The DeiT is used as the backbone of both streams. Extensive analysis of the e-waste dataset indicates the usefulness of our method, providing 96% accuracy in e-waste classification. The proposed approach demonstrates significant usefulness in addressing the global concern of e-waste management. It facilitates efficient waste management and recycling by accurately classifying e-waste images, reducing health and safety hazards associated with improper disposal. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12823v1-abstract-full').style.display = 'none'; document.getElementById('2311.12823v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2023 IEEE 8th International Conference On Software Engineering and Computer Systems (ICSECS), Penang, Malaysia, 2023, pp. 435-440 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.19830">arXiv:2310.19830</a> <span> [<a href="https://arxiv.org/pdf/2310.19830">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> GalliformeSpectra: A Hen Breed Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Himel%2C+G+M+S">Galib Muhammad Shahriar Himel</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Masudul Islam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.19830v1-abstract-short" style="display: inline;"> This article presents a comprehensive dataset featuring ten distinct hen breeds, sourced from various regions, capturing the unique characteristics and traits of each breed. The dataset encompasses Bielefeld, Blackorpington, Brahma, Buckeye, Fayoumi, Leghorn, Newhampshire, Plymouthrock, Sussex, and Turken breeds, offering a diverse representation of poultry commonly bred worldwide. A total of 1010… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.19830v1-abstract-full').style.display = 'inline'; document.getElementById('2310.19830v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.19830v1-abstract-full" style="display: none;"> This article presents a comprehensive dataset featuring ten distinct hen breeds, sourced from various regions, capturing the unique characteristics and traits of each breed. The dataset encompasses Bielefeld, Blackorpington, Brahma, Buckeye, Fayoumi, Leghorn, Newhampshire, Plymouthrock, Sussex, and Turken breeds, offering a diverse representation of poultry commonly bred worldwide. A total of 1010 original JPG images were meticulously collected, showcasing the physical attributes, feather patterns, and distinctive features of each hen breed. These images were subsequently standardized, resized, and converted to PNG format for consistency within the dataset. The compilation, although unevenly distributed across the breeds, provides a rich resource, serving as a foundation for research and applications in poultry science, genetics, and agricultural studies. This dataset holds significant potential to contribute to various fields by enabling the exploration and analysis of unique characteristics and genetic traits across different hen breeds, thereby supporting advancements in poultry breeding, farming, and genetic research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.19830v1-abstract-full').style.display = 'none'; document.getElementById('2310.19830v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.13046">arXiv:2309.13046</a> <span> [<a href="https://arxiv.org/pdf/2309.13046">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Privacy Preserving Machine Learning for Behavioral Authentication Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Morshedul Islam</a>, <a href="/search/cs?searchtype=author&query=Rafiq%2C+M+A">Md Abdur Rafiq</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.13046v1-abstract-short" style="display: inline;"> A behavioral authentication (BA) system uses the behavioral characteristics of users to verify their identity claims. A BA verification algorithm can be constructed by training a neural network (NN) classifier on users' profiles. The trained NN model classifies the presented verification data, and if the classification matches the claimed identity, the verification algorithm accepts the claim. Thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.13046v1-abstract-full').style.display = 'inline'; document.getElementById('2309.13046v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.13046v1-abstract-full" style="display: none;"> A behavioral authentication (BA) system uses the behavioral characteristics of users to verify their identity claims. A BA verification algorithm can be constructed by training a neural network (NN) classifier on users' profiles. The trained NN model classifies the presented verification data, and if the classification matches the claimed identity, the verification algorithm accepts the claim. This classification-based approach removes the need to maintain a profile database. However, similar to other NN architectures, the NN classifier of the BA system is vulnerable to privacy attacks. To protect the privacy of training and test data used in an NN different techniques are widely used. In this paper, our focus is on a non-crypto-based approach, and we used random projection (RP) to ensure data privacy in an NN model. RP is a distance-preserving transformation based on a random matrix. Before sharing the profiles with the verifier, users will transform their profiles by RP and keep their matrices secret. To reduce the computation load in RP, we use sparse random projection, which is very effective for low-compute devices. Along with correctness and security properties, our system can ensure the changeability property of the BA system. We also introduce an ML-based privacy attack, and our proposed system is robust against this and other privacy and security attacks. We implemented our approach on three existing behavioral BA systems and achieved a below 2.0% FRR and a below 1.0% FAR rate. Moreover, the machine learning-based privacy attacker can only recover below 3.0% to 12.0% of features from a portion of the projected profiles. However, these recovered features are not sufficient to know details about the users' behavioral pattern or to be used in a subsequent attack. Our approach is general and can be used in other NN-based BA systems as well as in traditional biometric systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.13046v1-abstract-full').style.display = 'none'; document.getElementById('2309.13046v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.15756">arXiv:2308.15756</a> <span> [<a href="https://arxiv.org/pdf/2308.15756">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Strongly Correlated Electrons">cond-mat.str-el</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> </div> </div> <p class="title is-5 mathjax"> Reimagining Sense Amplifiers: Harnessing Phase Transition Materials for Current and Voltage Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mazharul Islam</a>, <a href="/search/cs?searchtype=author&query=Alam%2C+S">Shamiul Alam</a>, <a href="/search/cs?searchtype=author&query=Jahangir%2C+M+A">Mohammad Adnan Jahangir</a>, <a href="/search/cs?searchtype=author&query=Rose%2C+G+S">Garrett S. Rose</a>, <a href="/search/cs?searchtype=author&query=Datta%2C+S">Suman Datta</a>, <a href="/search/cs?searchtype=author&query=Narayanan%2C+V">Vijaykrishnan Narayanan</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+S+K">Sumeet Kumar Gupta</a>, <a href="/search/cs?searchtype=author&query=Aziz%2C+A">Ahmedullah Aziz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.15756v1-abstract-short" style="display: inline;"> Energy-efficient sense amplifier (SA) circuits are essential for reliable detection of stored memory states in emerging memory systems. In this work, we present four novel sense amplifier (SA) topologies based on phase transition material (PTM) tailored for non-volatile memory applications. We utilize the abrupt switching and volatile hysteretic characteristics of PTMs which enables efficient and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.15756v1-abstract-full').style.display = 'inline'; document.getElementById('2308.15756v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.15756v1-abstract-full" style="display: none;"> Energy-efficient sense amplifier (SA) circuits are essential for reliable detection of stored memory states in emerging memory systems. In this work, we present four novel sense amplifier (SA) topologies based on phase transition material (PTM) tailored for non-volatile memory applications. We utilize the abrupt switching and volatile hysteretic characteristics of PTMs which enables efficient and fast sensing operation in our proposed SA topologies. We provide comprehensive details of their functionality and assess how process variations impact their performance metrics. Our proposed sense amplifier topologies manifest notable performance enhancement. We achieve a ~67% reduction in sensing delay and a ~80% decrease in sensing power for current sensing. For voltage sensing, we achieve a ~75% reduction in sensing delay and a ~33% decrease in sensing power. Moreover, the proposed SA topologies exhibit improved variation robustness compared to conventional SAs. We also scrutinize the dependence of transistor mirroring window and PTM transition voltages on several device parameters to determine the optimum operating conditions and stance of tunability for each of the proposed SA topologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.15756v1-abstract-full').style.display = 'none'; document.getElementById('2308.15756v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.15754">arXiv:2308.15754</a> <span> [<a href="https://arxiv.org/pdf/2308.15754">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mesoscale and Nanoscale Physics">cond-mat.mes-hall</span> </div> </div> <p class="title is-5 mathjax"> A Deep Dive into the Design Space of a Dynamically Reconfigurable Cryogenic Spiking Neuron </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mazharul Islam</a>, <a href="/search/cs?searchtype=author&query=Alam%2C+S">Shamiul Alam</a>, <a href="/search/cs?searchtype=author&query=Schuman%2C+C+D">Catherine D Schuman</a>, <a href="/search/cs?searchtype=author&query=Hossain%2C+M+S">Md Shafayat Hossain</a>, <a href="/search/cs?searchtype=author&query=Aziz%2C+A">Ahmedullah Aziz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.15754v1-abstract-short" style="display: inline;"> Spiking neural network offers the most bio-realistic approach to mimic the parallelism and compactness of the human brain. A spiking neuron is the central component of an SNN which generates information-encoded spikes. We present a comprehensive design space analysis of the superconducting memristor (SM)-based electrically reconfigurable cryogenic neuron. A superconducting nanowire (SNW) connected… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.15754v1-abstract-full').style.display = 'inline'; document.getElementById('2308.15754v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.15754v1-abstract-full" style="display: none;"> Spiking neural network offers the most bio-realistic approach to mimic the parallelism and compactness of the human brain. A spiking neuron is the central component of an SNN which generates information-encoded spikes. We present a comprehensive design space analysis of the superconducting memristor (SM)-based electrically reconfigurable cryogenic neuron. A superconducting nanowire (SNW) connected in parallel with an SM function as a dual-frequency oscillator and two of these oscillators can be coupled to design a dynamically tunable spiking neuron. The same neuron topology was previously proposed where a fixed resistance was used in parallel with the SNW. Replacing the fixed resistance with the SM provides an additional tuning knob with four distinct combinations of SM resistances, which improves the reconfigurability by up to ~70%. Utilizing an external bias current (Ibias), the spike frequency can be modulated up to ~3.5 times. Two distinct spike amplitudes (~1V and ~1.8 V) are also achieved. Here, we perform a systematic sensitivity analysis and show that the reconfigurability can be further tuned by choosing a higher input current strength. By performing a 500-point Monte Carlo variation analysis, we find that the spike amplitude is more variation robust than spike frequency and the variation robustness can be further improved by choosing a higher Ibias. Our study provides valuable insights for further exploration of materials and circuit level modification of the neuron that will be useful for system-level incorporation of the neuron circuit <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.15754v1-abstract-full').style.display = 'none'; document.getElementById('2308.15754v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.13143">arXiv:2307.13143</a> <span> [<a href="https://arxiv.org/pdf/2307.13143">pdf</a>, <a href="https://arxiv.org/ps/2307.13143">ps</a>, <a href="https://arxiv.org/format/2307.13143">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSE.2011.26">10.1109/TSE.2011.26 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Evaluation and Measurement of Software Process Improvement -- A Systematic Literature Review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Unterkalmsteiner%2C+M">Michael Unterkalmsteiner</a>, <a href="/search/cs?searchtype=author&query=Gorschek%2C+T">Tony Gorschek</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+A+K+M+M">A. K. M. Moinul Islam</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+C+K">Chow Kian Cheng</a>, <a href="/search/cs?searchtype=author&query=Permadi%2C+R+B">Rahadian Bayu Permadi</a>, <a href="/search/cs?searchtype=author&query=Feldt%2C+R">Robert Feldt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.13143v1-abstract-short" style="display: inline;"> BACKGROUND: Software Process Improvement (SPI) is a systematic approach to increase the efficiency and effectiveness of a software development organization and to enhance software products. OBJECTIVE: This paper aims to identify and characterize evaluation strategies and measurements used to assess the impact of different SPI initiatives. METHOD: The systematic literature review includes 148 paper… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.13143v1-abstract-full').style.display = 'inline'; document.getElementById('2307.13143v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.13143v1-abstract-full" style="display: none;"> BACKGROUND: Software Process Improvement (SPI) is a systematic approach to increase the efficiency and effectiveness of a software development organization and to enhance software products. OBJECTIVE: This paper aims to identify and characterize evaluation strategies and measurements used to assess the impact of different SPI initiatives. METHOD: The systematic literature review includes 148 papers published between 1991 and 2008. The selected papers were classified according to SPI initiative, applied evaluation strategies, and measurement perspectives. Potential confounding factors interfering with the evaluation of the improvement effort were assessed. RESULTS: Seven distinct evaluation strategies were identified, wherein the most common one, "Pre-Post Comparison" was applied in 49 percent of the inspected papers. Quality was the most measured attribute (62 percent), followed by Cost (41 percent), and Schedule (18 percent). Looking at measurement perspectives, "Project" represents the majority with 66 percent. CONCLUSION: The evaluation validity of SPI initiatives is challenged by the scarce consideration of potential confounding factors, particularly given that "Pre-Post Comparison" was identified as the most common evaluation strategy, and the inaccurate descriptions of the evaluation context. Measurements to assess the short and mid-term impact of SPI initiatives prevail, whereas long-term measurements in terms of customer satisfaction and return on investment tend to be less used. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.13143v1-abstract-full').style.display = 'none'; document.getElementById('2307.13143v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Trans. Software Eng. 38(2): 398-424 (2012) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.13089">arXiv:2307.13089</a> <span> [<a href="https://arxiv.org/pdf/2307.13089">pdf</a>, <a href="https://arxiv.org/ps/2307.13089">ps</a>, <a href="https://arxiv.org/format/2307.13089">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1002/smr.1637">10.1002/smr.1637 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A conceptual framework for SPI evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Unterkalmsteiner%2C+M">Michael Unterkalmsteiner</a>, <a href="/search/cs?searchtype=author&query=Gorschek%2C+T">Tony Gorschek</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+A+K+M+M">A. K. M. Moinul Islam</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+C+K">Chow Kian Cheng</a>, <a href="/search/cs?searchtype=author&query=Permadi%2C+R+B">Rahadian Bayu Permadi</a>, <a href="/search/cs?searchtype=author&query=Feldt%2C+R">Robert Feldt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.13089v1-abstract-short" style="display: inline;"> Software Process Improvement (SPI) encompasses the analysis and modification of the processes within software development, aimed at improving key areas that contribute to the organizations' goals. The task of evaluating whether the selected improvement path meets these goals is challenging. On the basis of the results of a systematic literature review on SPI measurement and evaluation practices, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.13089v1-abstract-full').style.display = 'inline'; document.getElementById('2307.13089v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.13089v1-abstract-full" style="display: none;"> Software Process Improvement (SPI) encompasses the analysis and modification of the processes within software development, aimed at improving key areas that contribute to the organizations' goals. The task of evaluating whether the selected improvement path meets these goals is challenging. On the basis of the results of a systematic literature review on SPI measurement and evaluation practices, we developed a framework (SPI Measurement and Evaluation Framework (SPI-MEF)) that supports the planning and implementation of SPI evaluations. SPI-MEF guides the practitioner in scoping the evaluation, determining measures, and performing the assessment. SPI-MEF does not assume a specific approach to process improvement and can be integrated in existing measurement programs, refocusing the assessment on evaluating the improvement initiative's outcome. Sixteen industry and academic experts evaluated the framework's usability and capability to support practitioners, providing additional insights that were integrated in the application guidelines of the framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.13089v1-abstract-full').style.display = 'none'; document.getElementById('2307.13089v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> J. Softw. Evol. Process. 26(2): 251-279 (2014) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.06124">arXiv:2306.06124</a> <span> [<a href="https://arxiv.org/pdf/2306.06124">pdf</a>, <a href="https://arxiv.org/format/2306.06124">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Unsupervised clustering of disturbances in power systems via deep convolutional autoencoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Maidul Islam</a>, <a href="/search/cs?searchtype=author&query=Faruque%2C+M+O">Md Omar Faruque</a>, <a href="/search/cs?searchtype=author&query=Butterfield%2C+J">Joshua Butterfield</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+G">Gaurav Singh</a>, <a href="/search/cs?searchtype=author&query=Cooke%2C+T+A">Thomas A. Cooke</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.06124v1-abstract-short" style="display: inline;"> Power quality (PQ) events are recorded by PQ meters whenever anomalous events are detected on the power grid. Using neural networks with machine learning can aid in accurately classifying the recorded waveforms and help power system engineers diagnose and rectify the root causes of problems. However, many of the waveforms captured during a disturbance in the power system need to be labeled for sup… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06124v1-abstract-full').style.display = 'inline'; document.getElementById('2306.06124v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.06124v1-abstract-full" style="display: none;"> Power quality (PQ) events are recorded by PQ meters whenever anomalous events are detected on the power grid. Using neural networks with machine learning can aid in accurately classifying the recorded waveforms and help power system engineers diagnose and rectify the root causes of problems. However, many of the waveforms captured during a disturbance in the power system need to be labeled for supervised learning, leaving a large number of data recordings for engineers to process manually or go unseen. This paper presents an autoencoder and K-means clustering-based unsupervised technique that can be used to cluster PQ events into categories like sag, interruption, transients, normal, and harmonic distortion to enable filtering of anomalous waveforms from recurring or normal waveforms. The method is demonstrated using three-phase, field-obtained voltage waveforms recorded in a distribution grid. First, a convolutional autoencoder compresses the input signals into a set of lower feature dimensions which, after further processing, is passed to the K-means algorithm to identify data clusters. Using a small, labeled dataset, numerical labels are then assigned to events based on a cosine similarity analysis. Finally, the study analyzes the clusters using the t-distributed stochastic neighbor embedding (t-SNE) visualization tool, demonstrating that the technique can help investigate a large number of captured events in a quick manner. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06124v1-abstract-full').style.display = 'none'; document.getElementById('2306.06124v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.12844">arXiv:2305.12844</a> <span> [<a href="https://arxiv.org/pdf/2305.12844">pdf</a>, <a href="https://arxiv.org/format/2305.12844">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Optimized Ensemble Deep Learning Model For Brain Tumor Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Talukder%2C+M+A">Md. Alamin Talukder</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md. Manowarul Islam</a>, <a href="/search/cs?searchtype=author&query=Uddin%2C+M+A">Md Ashraf Uddin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.12844v2-abstract-short" style="display: inline;"> Brain tumors present a grave risk to human life, demanding precise and timely diagnosis for effective treatment. Inaccurate identification of brain tumors can significantly diminish life expectancy, underscoring the critical need for precise diagnostic methods. Manual identification of brain tumors within vast Magnetic Resonance Imaging (MRI) image datasets is arduous and time-consuming. Thus, the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12844v2-abstract-full').style.display = 'inline'; document.getElementById('2305.12844v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.12844v2-abstract-full" style="display: none;"> Brain tumors present a grave risk to human life, demanding precise and timely diagnosis for effective treatment. Inaccurate identification of brain tumors can significantly diminish life expectancy, underscoring the critical need for precise diagnostic methods. Manual identification of brain tumors within vast Magnetic Resonance Imaging (MRI) image datasets is arduous and time-consuming. Thus, the development of a reliable deep learning (DL) model is essential to enhance diagnostic accuracy and ultimately save lives. This study introduces an innovative optimization-based deep ensemble approach employing transfer learning (TL) to efficiently classify brain tumors. Our methodology includes meticulous preprocessing, reconstruction of TL architectures, fine-tuning, and ensemble DL models utilizing weighted optimization techniques such as Genetic Algorithm-based Weight Optimization (GAWO) and Grid Search-based Weight Optimization (GSWO). Experimentation is conducted on the Figshare Contrast-Enhanced MRI (CE-MRI) brain tumor dataset, comprising 3064 images. Our approach achieves notable accuracy scores, with Xception, ResNet50V2, ResNet152V2, InceptionResNetV2, GAWO, and GSWO attaining 99.42%, 98.37%, 98.22%, 98.26%, 99.71%, and 99.76% accuracy, respectively. Notably, GSWO demonstrates superior accuracy, averaging 99.76\% accuracy across five folds on the Figshare CE-MRI brain tumor dataset. The comparative analysis highlights the significant performance enhancement of our proposed model over existing counterparts. In conclusion, our optimized deep ensemble model exhibits exceptional accuracy in swiftly classifying brain tumors. Furthermore, it has the potential to assist neurologists and clinicians in making accurate and immediate diagnostic decisions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12844v2-abstract-full').style.display = 'none'; document.getElementById('2305.12844v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.06015">arXiv:2304.06015</a> <span> [<a href="https://arxiv.org/pdf/2304.06015">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Improved Heart Disease Prediction Using Stacked Ensemble Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md. Maidul Islam</a>, <a href="/search/cs?searchtype=author&query=Tania%2C+T+N">Tanzina Nasrin Tania</a>, <a href="/search/cs?searchtype=author&query=Akter%2C+S">Sharmin Akter</a>, <a href="/search/cs?searchtype=author&query=Shakib%2C+K+H">Kazi Hassan Shakib</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.06015v1-abstract-short" style="display: inline;"> Heart disorder has just overtaken cancer as the world's biggest cause of mortality. Several cardiac failures, heart disease mortality, and diagnostic costs can all be reduced with early identification and treatment. Medical data is collected in large quantities by the healthcare industry, but it is not well mined. The discovery of previously unknown patterns and connections in this information can… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.06015v1-abstract-full').style.display = 'inline'; document.getElementById('2304.06015v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.06015v1-abstract-full" style="display: none;"> Heart disorder has just overtaken cancer as the world's biggest cause of mortality. Several cardiac failures, heart disease mortality, and diagnostic costs can all be reduced with early identification and treatment. Medical data is collected in large quantities by the healthcare industry, but it is not well mined. The discovery of previously unknown patterns and connections in this information can help with an improved decision when it comes to forecasting heart disorder risk. In the proposed study, we constructed an ML-based diagnostic system for heart illness forecasting, using a heart disorder dataset. We used data preprocessing techniques like outlier detection and removal, checking and removing missing entries, feature normalization, cross-validation, nine classification algorithms like RF, MLP, KNN, ETC, XGB, SVC, ADB, DT, and GBM, and eight classifier measuring performance metrics like ramification accuracy, precision, F1 score, specificity, ROC, sensitivity, log-loss, and Matthews' correlation coefficient, as well as eight classification performance evaluations. Our method can easily differentiate between people who have cardiac disease and those are normal. Receiver optimistic curves and also the region under the curves were determined by every classifier. Most of the classifiers, pretreatment strategies, validation methods, and performance assessment metrics for classification models have been discussed in this study. The performance of the proposed scheme has been confirmed, utilizing all of its capabilities. In this work, the impact of clinical decision support systems was evaluated using a stacked ensemble approach that included these nine algorithms <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.06015v1-abstract-full').style.display = 'none'; document.getElementById('2304.06015v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 5 figures and submitted to Springer Lecture Notes of the Institute for Computer Sciences, Social Informatics and Telecommunications Engineering</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.14427">arXiv:2212.14427</a> <span> [<a href="https://arxiv.org/pdf/2212.14427">pdf</a>, <a href="https://arxiv.org/format/2212.14427">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Efficient Movie Scene Detection using State-Space Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mohaiminul Islam</a>, <a href="/search/cs?searchtype=author&query=Hasan%2C+M">Mahmudul Hasan</a>, <a href="/search/cs?searchtype=author&query=Athrey%2C+K+S">Kishan Shamsundar Athrey</a>, <a href="/search/cs?searchtype=author&query=Braskich%2C+T">Tony Braskich</a>, <a href="/search/cs?searchtype=author&query=Bertasius%2C+G">Gedas Bertasius</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.14427v2-abstract-short" style="display: inline;"> The ability to distinguish between different movie scenes is critical for understanding the storyline of a movie. However, accurately detecting movie scenes is often challenging as it requires the ability to reason over very long movie segments. This is in contrast to most existing video recognition models, which are typically designed for short-range video analysis. This work proposes a State-Spa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.14427v2-abstract-full').style.display = 'inline'; document.getElementById('2212.14427v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.14427v2-abstract-full" style="display: none;"> The ability to distinguish between different movie scenes is critical for understanding the storyline of a movie. However, accurately detecting movie scenes is often challenging as it requires the ability to reason over very long movie segments. This is in contrast to most existing video recognition models, which are typically designed for short-range video analysis. This work proposes a State-Space Transformer model that can efficiently capture dependencies in long movie videos for accurate movie scene detection. Our model, dubbed TranS4mer, is built using a novel S4A building block, which combines the strengths of structured state-space sequence (S4) and self-attention (A) layers. Given a sequence of frames divided into movie shots (uninterrupted periods where the camera position does not change), the S4A block first applies self-attention to capture short-range intra-shot dependencies. Afterward, the state-space operation in the S4A block is used to aggregate long-range inter-shot cues. The final TranS4mer model, which can be trained end-to-end, is obtained by stacking the S4A blocks one after the other multiple times. Our proposed TranS4mer outperforms all prior methods in three movie scene detection datasets, including MovieNet, BBC, and OVSD, while also being $2\times$ faster and requiring $3\times$ less GPU memory than standard Transformer models. We will release our code and models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.14427v2-abstract-full').style.display = 'none'; document.getElementById('2212.14427v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CVPR 2023. Code: https://github.com/md-mohaiminul/TranS4mer</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.13835">arXiv:2212.13835</a> <span> [<a href="https://arxiv.org/pdf/2212.13835">pdf</a>, <a href="https://arxiv.org/format/2212.13835">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Representation Learning in Deep RL via Discrete Information Bottleneck </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+R">Riashat Islam</a>, <a href="/search/cs?searchtype=author&query=Zang%2C+H">Hongyu Zang</a>, <a href="/search/cs?searchtype=author&query=Tomar%2C+M">Manan Tomar</a>, <a href="/search/cs?searchtype=author&query=Didolkar%2C+A">Aniket Didolkar</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mofijul Islam</a>, <a href="/search/cs?searchtype=author&query=Arnob%2C+S+Y">Samin Yeasar Arnob</a>, <a href="/search/cs?searchtype=author&query=Iqbal%2C+T">Tariq Iqbal</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xin Li</a>, <a href="/search/cs?searchtype=author&query=Goyal%2C+A">Anirudh Goyal</a>, <a href="/search/cs?searchtype=author&query=Heess%2C+N">Nicolas Heess</a>, <a href="/search/cs?searchtype=author&query=Lamb%2C+A">Alex Lamb</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.13835v2-abstract-short" style="display: inline;"> Several self-supervised representation learning methods have been proposed for reinforcement learning (RL) with rich observations. For real-world applications of RL, recovering underlying latent states is crucial, particularly when sensory inputs contain irrelevant and exogenous information. In this work, we study how information bottlenecks can be used to construct latent states efficiently in th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.13835v2-abstract-full').style.display = 'inline'; document.getElementById('2212.13835v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.13835v2-abstract-full" style="display: none;"> Several self-supervised representation learning methods have been proposed for reinforcement learning (RL) with rich observations. For real-world applications of RL, recovering underlying latent states is crucial, particularly when sensory inputs contain irrelevant and exogenous information. In this work, we study how information bottlenecks can be used to construct latent states efficiently in the presence of task-irrelevant information. We propose architectures that utilize variational and discrete information bottlenecks, coined as RepDIB, to learn structured factorized representations. Exploiting the expressiveness bought by factorized representations, we introduce a simple, yet effective, bottleneck that can be integrated with any existing self-supervised objective for RL. We demonstrate this across several online and offline RL benchmarks, along with a real robot arm task, where we find that compressed representations with RepDIB can lead to strong performance improvements, as the learned bottlenecks help predict only the relevant state while ignoring irrelevant information. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.13835v2-abstract-full').style.display = 'none'; document.getElementById('2212.13835v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AISTATS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.04546">arXiv:2212.04546</a> <span> [<a href="https://arxiv.org/pdf/2212.04546">pdf</a>, <a href="https://arxiv.org/format/2212.04546">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.jisa.2022.103405">10.1016/j.jisa.2022.103405 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Dependable Hybrid Machine Learning Model for Network Intrusion Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Talukder%2C+M+A">Md. Alamin Talukder</a>, <a href="/search/cs?searchtype=author&query=Hasan%2C+K+F">Khondokar Fida Hasan</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md. Manowarul Islam</a>, <a href="/search/cs?searchtype=author&query=Uddin%2C+M+A">Md Ashraf Uddin</a>, <a href="/search/cs?searchtype=author&query=Akhter%2C+A">Arnisha Akhter</a>, <a href="/search/cs?searchtype=author&query=Yousuf%2C+M+A">Mohammad Abu Yousuf</a>, <a href="/search/cs?searchtype=author&query=Alharbi%2C+F">Fares Alharbi</a>, <a href="/search/cs?searchtype=author&query=Moni%2C+M+A">Mohammad Ali Moni</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.04546v2-abstract-short" style="display: inline;"> Network intrusion detection systems (NIDSs) play an important role in computer network security. There are several detection mechanisms where anomaly-based automated detection outperforms others significantly. Amid the sophistication and growing number of attacks, dealing with large amounts of data is a recognized issue in the development of anomaly-based NIDS. However, do current models meet the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.04546v2-abstract-full').style.display = 'inline'; document.getElementById('2212.04546v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.04546v2-abstract-full" style="display: none;"> Network intrusion detection systems (NIDSs) play an important role in computer network security. There are several detection mechanisms where anomaly-based automated detection outperforms others significantly. Amid the sophistication and growing number of attacks, dealing with large amounts of data is a recognized issue in the development of anomaly-based NIDS. However, do current models meet the needs of today's networks in terms of required accuracy and dependability? In this research, we propose a new hybrid model that combines machine learning and deep learning to increase detection rates while securing dependability. Our proposed method ensures efficient pre-processing by combining SMOTE for data balancing and XGBoost for feature selection. We compared our developed method to various machine learning and deep learning algorithms to find a more efficient algorithm to implement in the pipeline. Furthermore, we chose the most effective model for network intrusion based on a set of benchmarked performance analysis criteria. Our method produces excellent results when tested on two datasets, KDDCUP'99 and CIC-MalMem-2022, with an accuracy of 99.99% and 100% for KDDCUP'99 and CIC-MalMem-2022, respectively, and no overfitting or Type-1 and Type-2 issues. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.04546v2-abstract-full').style.display = 'none'; document.getElementById('2212.04546v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in the Journal of Information Security and Applications (Scopus, Web of Science (SCIE) Journal, Quartile: Q1, Site Score: 7.6, Impact Factor: 4.96) on 7 December 2022</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Journal of Information Security and Applications, Volume 72, Pages 103405, Year 2023, ISSN 2214-2126 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.14235">arXiv:2211.14235</a> <span> [<a href="https://arxiv.org/pdf/2211.14235">pdf</a>, <a href="https://arxiv.org/format/2211.14235">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s00521-023-08493-1">10.1007/s00521-023-08493-1 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> DoubleU-NetPlus: A Novel Attention and Context Guided Dual U-Net with Multi-Scale Residual Feature Fusion Network for Semantic Segmentation of Medical Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahmed%2C+M+R">Md. Rayhan Ahmed</a>, <a href="/search/cs?searchtype=author&query=Ashrafi%2C+A+F">Adnan Ferdous Ashrafi</a>, <a href="/search/cs?searchtype=author&query=Ahmed%2C+R+U">Raihan Uddin Ahmed</a>, <a href="/search/cs?searchtype=author&query=Shatabda%2C+S">Swakkhar Shatabda</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+A+K+M+M">A. K. M. Muzahidul Islam</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+S">Salekul Islam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.14235v1-abstract-short" style="display: inline;"> Accurate segmentation of the region of interest in medical images can provide an essential pathway for devising effective treatment plans for life-threatening diseases. It is still challenging for U-Net, and its state-of-the-art variants, such as CE-Net and DoubleU-Net, to effectively model the higher-level output feature maps of the convolutional units of the network mostly due to the presence of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.14235v1-abstract-full').style.display = 'inline'; document.getElementById('2211.14235v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.14235v1-abstract-full" style="display: none;"> Accurate segmentation of the region of interest in medical images can provide an essential pathway for devising effective treatment plans for life-threatening diseases. It is still challenging for U-Net, and its state-of-the-art variants, such as CE-Net and DoubleU-Net, to effectively model the higher-level output feature maps of the convolutional units of the network mostly due to the presence of various scales of the region of interest, intricacy of context environments, ambiguous boundaries, and multiformity of textures in medical images. In this paper, we exploit multi-contextual features and several attention strategies to increase networks' ability to model discriminative feature representation for more accurate medical image segmentation, and we present a novel dual U-Net-based architecture named DoubleU-NetPlus. The DoubleU-NetPlus incorporates several architectural modifications. In particular, we integrate EfficientNetB7 as the feature encoder module, a newly designed multi-kernel residual convolution module, and an adaptive feature re-calibrating attention-based atrous spatial pyramid pooling module to progressively and precisely accumulate discriminative multi-scale high-level contextual feature maps and emphasize the salient regions. In addition, we introduce a novel triple attention gate module and a hybrid triple attention module to encourage selective modeling of relevant medical image features. Moreover, to mitigate the gradient vanishing issue and incorporate high-resolution features with deeper spatial details, the standard convolution operation is replaced with the attention-guided residual convolution operations, ... <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.14235v1-abstract-full').style.display = 'none'; document.getElementById('2211.14235v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 9 figures, 4 tables, Submitted to Springer</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 92C55 (Primary) <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.6 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Neural Computing and Applications, Volume 35, Pages 14379 - 14401 (2023) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.11814">arXiv:2207.11814</a> <span> [<a href="https://arxiv.org/pdf/2207.11814">pdf</a>, <a href="https://arxiv.org/format/2207.11814">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Object State Change Classification in Egocentric Videos using the Divided Space-Time Attention Mechanism </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md Mohaiminul Islam</a>, <a href="/search/cs?searchtype=author&query=Bertasius%2C+G">Gedas Bertasius</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.11814v2-abstract-short" style="display: inline;"> This report describes our submission called "TarHeels" for the Ego4D: Object State Change Classification Challenge. We use a transformer-based video recognition model and leverage the Divided Space-Time Attention mechanism for classifying object state change in egocentric videos. Our submission achieves the second-best performance in the challenge. Furthermore, we perform an ablation study to show… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.11814v2-abstract-full').style.display = 'inline'; document.getElementById('2207.11814v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.11814v2-abstract-full" style="display: none;"> This report describes our submission called "TarHeels" for the Ego4D: Object State Change Classification Challenge. We use a transformer-based video recognition model and leverage the Divided Space-Time Attention mechanism for classifying object state change in egocentric videos. Our submission achieves the second-best performance in the challenge. Furthermore, we perform an ablation study to show that identifying object state change in egocentric videos requires temporal modeling ability. Lastly, we present several positive and negative examples to visualize our model's predictions. The code is publicly available at: https://github.com/md-mohaiminul/ObjectStateChange <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.11814v2-abstract-full').style.display = 'none'; document.getElementById('2207.11814v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2nd place winner, Ego4D challenge, CVPR 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.01088">arXiv:2206.01088</a> <span> [<a href="https://arxiv.org/pdf/2206.01088">pdf</a>, <a href="https://arxiv.org/format/2206.01088">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Machine Learning-based Lung and Colon Cancer Detection using Deep Feature Extraction and Ensemble Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Talukder%2C+M+A">Md. Alamin Talukder</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+M">Md. Manowarul Islam</a>, <a href="/search/cs?searchtype=author&query=Uddin%2C+M+A">Md Ashraf Uddin</a>, <a href="/search/cs?searchtype=author&query=Akhter%2C+A">Arnisha Akhter</a>, <a href="/search/cs?searchtype=author&query=Hasan%2C+K+F">Khondokar Fida Hasan</a>, <a href="/search/cs?searchtype=author&query=Moni%2C+M+A">Mohammad Ali Moni</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.01088v2-abstract-short" style="display: inline;"> Cancer is a fatal disease caused by a combination of genetic diseases and a variety of biochemical abnormalities. Lung and colon cancer have emerged as two of the leading causes of death and disability in humans. The histopathological detection of such malignancies is usually the most important component in determining the best course of action. Early detection of the ailment on either front consi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.01088v2-abstract-full').style.display = 'inline'; document.getElementById('2206.01088v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.01088v2-abstract-full" style="display: none;"> Cancer is a fatal disease caused by a combination of genetic diseases and a variety of biochemical abnormalities. Lung and colon cancer have emerged as two of the leading causes of death and disability in humans. The histopathological detection of such malignancies is usually the most important component in determining the best course of action. Early detection of the ailment on either front considerably decreases the likelihood of mortality. Machine learning and deep learning techniques can be utilized to speed up such cancer detection, allowing researchers to study a large number of patients in a much shorter amount of time and at a lower cost. In this research work, we introduced a hybrid ensemble feature extraction model to efficiently identify lung and colon cancer. It integrates deep feature extraction and ensemble learning with high-performance filtering for cancer image datasets. The model is evaluated on histopathological (LC25000) lung and colon datasets. According to the study findings, our hybrid model can detect lung, colon, and (lung and colon) cancer with accuracy rates of 99.05%, 100%, and 99.30%, respectively. The study's findings show that our proposed strategy outperforms existing models significantly. Thus, these models could be applicable in clinics to support the doctor in the diagnosis of cancers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.01088v2-abstract-full').style.display = 'none'; document.getElementById('2206.01088v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication in the Special Issue of Expert Systems with Applications (IF:6.954, Cite:12.70) How to Cite: Md. Alamin Talukder, Md. Manowarul Islam, Md Ashraf Uddin, Arnisha Akhter, Khondokar Fida Hasan, Mohammad Ali Moni. "Machine Learning-based Lung and Colon Cancer Detection using Deep Feature Extraction and Ensemble Learning", Expert Systems with Applications. 2022 Jun 1</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Islam%2C+M+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Islam%2C+M+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Islam%2C+M+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>