Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 147 results for author: <span class="mathjax">Hosseini, M</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Hosseini%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Hosseini, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Hosseini%2C+M&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Hosseini, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Hosseini%2C+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Hosseini%2C+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Hosseini%2C+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Hosseini%2C+M&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.11481">arXiv:2503.11481</a> <span> [<a href="https://arxiv.org/pdf/2503.11481">pdf</a>, <a href="https://arxiv.org/format/2503.11481">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> T2I-FineEval: Fine-Grained Compositional Metric for Text-to-Image Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+S+M+H">Seyed Mohammad Hadi Hosseini</a>, <a href="/search/cs?searchtype=author&query=Izadi%2C+A+M">Amir Mohammad Izadi</a>, <a href="/search/cs?searchtype=author&query=Abdollahi%2C+A">Ali Abdollahi</a>, <a href="/search/cs?searchtype=author&query=Saghafian%2C+A">Armin Saghafian</a>, <a href="/search/cs?searchtype=author&query=Baghshah%2C+M+S">Mahdieh Soleymani Baghshah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.11481v1-abstract-short" style="display: inline;"> Although recent text-to-image generative models have achieved impressive performance, they still often struggle with capturing the compositional complexities of prompts including attribute binding, and spatial relationships between different entities. This misalignment is not revealed by common evaluation metrics such as CLIPScore. Recent works have proposed evaluation metrics that utilize Visual… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.11481v1-abstract-full').style.display = 'inline'; document.getElementById('2503.11481v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.11481v1-abstract-full" style="display: none;"> Although recent text-to-image generative models have achieved impressive performance, they still often struggle with capturing the compositional complexities of prompts including attribute binding, and spatial relationships between different entities. This misalignment is not revealed by common evaluation metrics such as CLIPScore. Recent works have proposed evaluation metrics that utilize Visual Question Answering (VQA) by decomposing prompts into questions about the generated image for more robust compositional evaluation. Although these methods align better with human evaluations, they still fail to fully cover the compositionality within the image. To address this, we propose a novel metric that breaks down images into components, and texts into fine-grained questions about the generated image for evaluation. Our method outperforms previous state-of-the-art metrics, demonstrating its effectiveness in evaluating text-to-image generative models. Code is available at https://github.com/hadi-hosseini/ T2I-FineEval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.11481v1-abstract-full').style.display = 'none'; document.getElementById('2503.11481v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ECCV 2024 Workshop EVAL-FoMo</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.06506">arXiv:2503.06506</a> <span> [<a href="https://arxiv.org/pdf/2503.06506">pdf</a>, <a href="https://arxiv.org/format/2503.06506">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Fine-Grained Alignment and Noise Refinement for Compositional Text-to-Image Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Izadi%2C+A+M">Amir Mohammad Izadi</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+S+M+H">Seyed Mohammad Hadi Hosseini</a>, <a href="/search/cs?searchtype=author&query=Tabar%2C+S+V">Soroush Vafaie Tabar</a>, <a href="/search/cs?searchtype=author&query=Abdollahi%2C+A">Ali Abdollahi</a>, <a href="/search/cs?searchtype=author&query=Saghafian%2C+A">Armin Saghafian</a>, <a href="/search/cs?searchtype=author&query=Baghshah%2C+M+S">Mahdieh Soleymani Baghshah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.06506v1-abstract-short" style="display: inline;"> Text-to-image generative models have made significant advancements in recent years; however, accurately capturing intricate details in textual prompts, such as entity missing, attribute binding errors, and incorrect relationships remains a formidable challenge. In response, we present an innovative, training-free method that directly addresses these challenges by incorporating tailored objectives… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.06506v1-abstract-full').style.display = 'inline'; document.getElementById('2503.06506v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.06506v1-abstract-full" style="display: none;"> Text-to-image generative models have made significant advancements in recent years; however, accurately capturing intricate details in textual prompts, such as entity missing, attribute binding errors, and incorrect relationships remains a formidable challenge. In response, we present an innovative, training-free method that directly addresses these challenges by incorporating tailored objectives to account for textual constraints. Unlike layout-based approaches that enforce rigid structures and limit diversity, our proposed approach offers a more flexible arrangement of the scene by imposing just the extracted constraints from the text, without any unnecessary additions. These constraints are formulated as losses-entity missing, entity mixing, attribute binding, and spatial relationships, integrated into a unified loss that is applied in the first generation stage. Furthermore, we introduce a feedback-driven system for fine-grained initial noise refinement. This system integrates a verifier that evaluates the generated image, identifies inconsistencies, and provides corrective feedback. Leveraging this feedback, our refinement method first targets the unmet constraints by refining the faulty attention maps caused by initial noise, through the optimization of selective losses associated with these constraints. Subsequently, our unified loss function is reapplied to proceed the second generation phase. Experimental results demonstrate that our method, relying solely on our proposed objective functions, significantly enhances compositionality, achieving a 24% improvement in human evaluation and a 25% gain in spatial relationships. Furthermore, our fine-grained noise refinement proves effective, boosting performance by up to 5%. Code is available at https://github.com/hadi-hosseini/noise-refinement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.06506v1-abstract-full').style.display = 'none'; document.getElementById('2503.06506v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.02967">arXiv:2503.02967</a> <span> [<a href="https://arxiv.org/pdf/2503.02967">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Revolutionizing Traffic Management with AI-Powered Machine Vision: A Step Toward Smart Cities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=DolatAbadi%2C+S+H+H">Seyed Hossein Hosseini DolatAbadi</a>, <a href="/search/cs?searchtype=author&query=Hashemi%2C+S+M+H">Sayyed Mohammad Hossein Hashemi</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Mohammad Hosseini</a>, <a href="/search/cs?searchtype=author&query=AliHosseini%2C+M">Moein-Aldin AliHosseini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.02967v1-abstract-short" style="display: inline;"> The rapid urbanization of cities and increasing vehicular congestion have posed significant challenges to traffic management and safety. This study explores the transformative potential of artificial intelligence (AI) and machine vision technologies in revolutionizing traffic systems. By leveraging advanced surveillance cameras and deep learning algorithms, this research proposes a system for real… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.02967v1-abstract-full').style.display = 'inline'; document.getElementById('2503.02967v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.02967v1-abstract-full" style="display: none;"> The rapid urbanization of cities and increasing vehicular congestion have posed significant challenges to traffic management and safety. This study explores the transformative potential of artificial intelligence (AI) and machine vision technologies in revolutionizing traffic systems. By leveraging advanced surveillance cameras and deep learning algorithms, this research proposes a system for real-time detection of vehicles, traffic anomalies, and driver behaviors. The system integrates geospatial and weather data to adapt dynamically to environmental conditions, ensuring robust performance in diverse scenarios. Using YOLOv8 and YOLOv11 models, the study achieves high accuracy in vehicle detection and anomaly recognition, optimizing traffic flow and enhancing road safety. These findings contribute to the development of intelligent traffic management solutions and align with the vision of creating smart cities with sustainable and efficient urban infrastructure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.02967v1-abstract-full').style.display = 'none'; document.getElementById('2503.02967v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 1 figure, 2 tables, accepted to 1th AITC conference in University Of Isfahan</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.02512">arXiv:2503.02512</a> <span> [<a href="https://arxiv.org/pdf/2503.02512">pdf</a>, <a href="https://arxiv.org/format/2503.02512">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Symbolic Computation">cs.SC</span> </div> </div> <p class="title is-5 mathjax"> LTL Verification of Memoryful Neural Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Mehran Hosseini</a>, <a href="/search/cs?searchtype=author&query=Lomuscio%2C+A">Alessio Lomuscio</a>, <a href="/search/cs?searchtype=author&query=Paoletti%2C+N">Nicola Paoletti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.02512v1-abstract-short" style="display: inline;"> We present a framework for verifying Memoryful Neural Multi-Agent Systems (MN-MAS) against full Linear Temporal Logic (LTL) specifications. In MN-MAS, agents interact with a non-deterministic, partially observable environment. Examples of MN-MAS include multi-agent systems based on feed-forward and recurrent neural networks or state-space models. Different from previous approaches, we support the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.02512v1-abstract-full').style.display = 'inline'; document.getElementById('2503.02512v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.02512v1-abstract-full" style="display: none;"> We present a framework for verifying Memoryful Neural Multi-Agent Systems (MN-MAS) against full Linear Temporal Logic (LTL) specifications. In MN-MAS, agents interact with a non-deterministic, partially observable environment. Examples of MN-MAS include multi-agent systems based on feed-forward and recurrent neural networks or state-space models. Different from previous approaches, we support the verification of both bounded and unbounded LTL specifications. We leverage well-established bounded model checking techniques, including lasso search and invariant synthesis, to reduce the verification problem to that of constraint solving. To solve these constraints, we develop efficient methods based on bound propagation, mixed-integer linear programming, and adaptive splitting. We evaluate the effectiveness of our algorithms in single and multi-agent environments from the Gymnasium and PettingZoo libraries, verifying unbounded specifications for the first time and improving the verification time for bounded specifications by an order of magnitude compared to the SoA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.02512v1-abstract-full').style.display = 'none'; document.getElementById('2503.02512v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 2 figures, accepted at AAMAS 2025 conference</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68Q60 (Primary) 68T27; 68T07; 68T37; 68T40; 68T42 (Secondary) <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> D.2.4; F.3.1; I.2.4; I.2.11; I.2.8; F.4.1; I.2.2; I.2.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.19888">arXiv:2502.19888</a> <span> [<a href="https://arxiv.org/pdf/2502.19888">pdf</a>, <a href="https://arxiv.org/format/2502.19888">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706598.3713421">10.1145/3706598.3713421 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Accessibility for Whom? Perceptions of Sidewalk Barriers Across Disability Groups and Implications for Designing Personalized Maps </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+C">Chu Li</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+R+Y">Rock Yuren Pang</a>, <a href="/search/cs?searchtype=author&query=Labb%C3%A9%2C+D">Delphine Labb茅</a>, <a href="/search/cs?searchtype=author&query=Eisenberg%2C+Y">Yochai Eisenberg</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Maryam Hosseini</a>, <a href="/search/cs?searchtype=author&query=Froehlich%2C+J+E">Jon E. Froehlich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.19888v1-abstract-short" style="display: inline;"> Despite diverse mobility needs worldwide, existing mapping tools fail to address the varied experiences of different mobility device users. This paper presents a large-scale online survey exploring how five mobility groups -- users of canes, walkers, mobility scooters, manual wheelchairs, and motorized wheelchairs -- perceive sidewalk barriers. Using 52 sidewalk barrier images, respondents evaluat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19888v1-abstract-full').style.display = 'inline'; document.getElementById('2502.19888v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.19888v1-abstract-full" style="display: none;"> Despite diverse mobility needs worldwide, existing mapping tools fail to address the varied experiences of different mobility device users. This paper presents a large-scale online survey exploring how five mobility groups -- users of canes, walkers, mobility scooters, manual wheelchairs, and motorized wheelchairs -- perceive sidewalk barriers. Using 52 sidewalk barrier images, respondents evaluated their confidence in navigating each scenario. Our findings (N=190) reveal variations in barrier perceptions across groups, while also identifying shared concerns. To further demonstrate the value of this data, we showcase its use in two custom prototypes: a visual analytics tool and a personalized routing tool. Our survey findings and open dataset advance work in accessibility-focused maps, routing algorithms, and urban planning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19888v1-abstract-full').style.display = 'none'; document.getElementById('2502.19888v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Manuscript accepted at CHI'25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14634">arXiv:2502.14634</a> <span> [<a href="https://arxiv.org/pdf/2502.14634">pdf</a>, <a href="https://arxiv.org/format/2502.14634">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CER: Confidence Enhanced Reasoning in LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Razghandi%2C+A">Ali Razghandi</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+S+M+H">Seyed Mohammad Hadi Hosseini</a>, <a href="/search/cs?searchtype=author&query=Baghshah%2C+M+S">Mahdieh Soleymani Baghshah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14634v1-abstract-short" style="display: inline;"> Ensuring the reliability of Large Language Models (LLMs) in complex reasoning tasks remains a formidable challenge, particularly in scenarios that demand precise mathematical calculations and knowledge-intensive open-domain generation. In this work, we introduce an uncertainty-aware framework designed to enhance the accuracy of LLM responses by systematically incorporating model confidence at crit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14634v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14634v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14634v1-abstract-full" style="display: none;"> Ensuring the reliability of Large Language Models (LLMs) in complex reasoning tasks remains a formidable challenge, particularly in scenarios that demand precise mathematical calculations and knowledge-intensive open-domain generation. In this work, we introduce an uncertainty-aware framework designed to enhance the accuracy of LLM responses by systematically incorporating model confidence at critical decision points. We propose an approach that encourages multi-step reasoning in LLMs and quantify the confidence of intermediate answers such as numerical results in mathematical reasoning and proper nouns in open-domain generation. Then, the overall confidence of each reasoning chain is evaluated based on confidence of these critical intermediate steps. Finally, we aggregate the answer of generated response paths in a way that reflects the reliability of each generated content (as opposed to self-consistency in which each generated chain contributes equally to majority voting). We conducted extensive experiments in five datasets, three mathematical datasets and two open-domain datasets, using four LLMs. The results consistently validate the effectiveness of our novel confidence aggregation method, leading to an accuracy improvement of up to 7.4% and 5.8% over baseline approaches in math and open-domain generation tasks, respectively. Code is publicly available at https://github.com/ Aquasar11/CER. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14634v1-abstract-full').style.display = 'none'; document.getElementById('2502.14634v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11049">arXiv:2502.11049</a> <span> [<a href="https://arxiv.org/pdf/2502.11049">pdf</a>, <a href="https://arxiv.org/format/2502.11049">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Faces of Fairness: Examining Bias in Facial Expression Recognition Datasets and Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+M">Mohammad Mehdi Hosseini</a>, <a href="/search/cs?searchtype=author&query=Fard%2C+A+P">Ali Pourramezan Fard</a>, <a href="/search/cs?searchtype=author&query=Mahoor%2C+M+H">Mohammad H. Mahoor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11049v1-abstract-short" style="display: inline;"> Building AI systems, including Facial Expression Recognition (FER), involves two critical aspects: data and model design. Both components significantly influence bias and fairness in FER tasks. Issues related to bias and fairness in FER datasets and models remain underexplored. This study investigates bias sources in FER datasets and models. Four common FER datasets--AffectNet, ExpW, Fer2013, and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11049v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11049v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11049v1-abstract-full" style="display: none;"> Building AI systems, including Facial Expression Recognition (FER), involves two critical aspects: data and model design. Both components significantly influence bias and fairness in FER tasks. Issues related to bias and fairness in FER datasets and models remain underexplored. This study investigates bias sources in FER datasets and models. Four common FER datasets--AffectNet, ExpW, Fer2013, and RAF-DB--are analyzed. The findings demonstrate that AffectNet and ExpW exhibit high generalizability despite data imbalances. Additionally, this research evaluates the bias and fairness of six deep models, including three state-of-the-art convolutional neural network (CNN) models: MobileNet, ResNet, XceptionNet, as well as three transformer-based models: ViT, CLIP, and GPT-4o-mini. Experimental results reveal that while GPT-4o-mini and ViT achieve the highest accuracy scores, they also display the highest levels of bias. These findings underscore the urgent need for developing new methodologies to mitigate bias and ensure fairness in datasets and models, particularly in affective computing applications. See our implementation details at https://github.com/MMHosseini/bias_in_FER. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11049v1-abstract-full').style.display = 'none'; document.getElementById('2502.11049v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08580">arXiv:2502.08580</a> <span> [<a href="https://arxiv.org/pdf/2502.08580">pdf</a>, <a href="https://arxiv.org/format/2502.08580">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Ultrasound Image Generation using Latent Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Freiche%2C+B">Benoit Freiche</a>, <a href="/search/cs?searchtype=author&query=El-Khoury%2C+A">Anthony El-Khoury</a>, <a href="/search/cs?searchtype=author&query=Nasiri-Sarvi%2C+A">Ali Nasiri-Sarvi</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+S">Mahdi S. Hosseini</a>, <a href="/search/cs?searchtype=author&query=Garcia%2C+D">Damien Garcia</a>, <a href="/search/cs?searchtype=author&query=Basarab%2C+A">Adrian Basarab</a>, <a href="/search/cs?searchtype=author&query=Boily%2C+M">Mathieu Boily</a>, <a href="/search/cs?searchtype=author&query=Rivaz%2C+H">Hassan Rivaz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08580v1-abstract-short" style="display: inline;"> Diffusion models for image generation have been a subject of increasing interest due to their ability to generate diverse, high-quality images. Image generation has immense potential in medical imaging because open-source medical images are difficult to obtain compared to natural images, especially for rare conditions. The generated images can be used later to train classification and segmentation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08580v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08580v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08580v1-abstract-full" style="display: none;"> Diffusion models for image generation have been a subject of increasing interest due to their ability to generate diverse, high-quality images. Image generation has immense potential in medical imaging because open-source medical images are difficult to obtain compared to natural images, especially for rare conditions. The generated images can be used later to train classification and segmentation models. In this paper, we propose simulating realistic ultrasound (US) images by successive fine-tuning of large diffusion models on different publicly available databases. To do so, we fine-tuned Stable Diffusion, a state-of-the-art latent diffusion model, on BUSI (Breast US Images) an ultrasound breast image dataset. We successfully generated high-quality US images of the breast using simple prompts that specify the organ and pathology, which appeared realistic to three experienced US scientists and a US radiologist. Additionally, we provided user control by conditioning the model with segmentations through ControlNet. We will release the source code at http://code.sonography.ai/ to allow fast US image generation to the scientific community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08580v1-abstract-full').style.display = 'none'; document.getElementById('2502.08580v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages conference paper for SPIE medical imaging</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68-06 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12815">arXiv:2501.12815</a> <span> [<a href="https://arxiv.org/pdf/2501.12815">pdf</a>, <a href="https://arxiv.org/format/2501.12815">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Certified Guidance for Planning with Deep Generative Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Giacomarra%2C+F">Francesco Giacomarra</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Mehran Hosseini</a>, <a href="/search/cs?searchtype=author&query=Paoletti%2C+N">Nicola Paoletti</a>, <a href="/search/cs?searchtype=author&query=Cairoli%2C+F">Francesca Cairoli</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12815v1-abstract-short" style="display: inline;"> Deep generative models, such as generative adversarial networks and diffusion models, have recently emerged as powerful tools for planning tasks and behavior synthesis in autonomous systems. Various guidance strategies have been introduced to steer the generative process toward outputs that are more likely to satisfy the planning objectives. These strategies avoid the need for model retraining but… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12815v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12815v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12815v1-abstract-full" style="display: none;"> Deep generative models, such as generative adversarial networks and diffusion models, have recently emerged as powerful tools for planning tasks and behavior synthesis in autonomous systems. Various guidance strategies have been introduced to steer the generative process toward outputs that are more likely to satisfy the planning objectives. These strategies avoid the need for model retraining but do not provide any guarantee that the generated outputs will satisfy the desired planning objectives. To address this limitation, we introduce certified guidance, an approach that modifies a generative model, without retraining it, into a new model guaranteed to satisfy a given specification with probability one. We focus on Signal Temporal Logic specifications, which are rich enough to describe nontrivial planning tasks. Our approach leverages neural network verification techniques to systematically explore the latent spaces of the generative models, identifying latent regions that are certifiably correct with respect to the STL property of interest. We evaluate the effectiveness of our method on four planning benchmarks using GANs and diffusion models. Our results confirm that certified guidance produces generative models that are always correct, unlike existing guidance methods that are not certified. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12815v1-abstract-full').style.display = 'none'; document.getElementById('2501.12815v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 2 figures, accepted at AAMAS 25 conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07719">arXiv:2501.07719</a> <span> [<a href="https://arxiv.org/pdf/2501.07719">pdf</a>, <a href="https://arxiv.org/format/2501.07719">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Entailed Between the Lines: Incorporating Implication into NLI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Havaldar%2C+S">Shreya Havaldar</a>, <a href="/search/cs?searchtype=author&query=Alvari%2C+H">Hamidreza Alvari</a>, <a href="/search/cs?searchtype=author&query=Palowitch%2C+J">John Palowitch</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+J">Mohammad Javad Hosseini</a>, <a href="/search/cs?searchtype=author&query=Buthpitiya%2C+S">Senaka Buthpitiya</a>, <a href="/search/cs?searchtype=author&query=Fabrikant%2C+A">Alex Fabrikant</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07719v2-abstract-short" style="display: inline;"> Much of human communication depends on implication, conveying meaning beyond literal words to express a wider range of thoughts, intentions, and feelings. For models to better understand and facilitate human communication, they must be responsive to the text's implicit meaning. We focus on Natural Language Inference (NLI), a core tool for many language tasks, and find that state-of-the-art NLI mod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07719v2-abstract-full').style.display = 'inline'; document.getElementById('2501.07719v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07719v2-abstract-full" style="display: none;"> Much of human communication depends on implication, conveying meaning beyond literal words to express a wider range of thoughts, intentions, and feelings. For models to better understand and facilitate human communication, they must be responsive to the text's implicit meaning. We focus on Natural Language Inference (NLI), a core tool for many language tasks, and find that state-of-the-art NLI models and datasets struggle to recognize a range of cases where entailment is implied, rather than explicit from the text. We formalize implied entailment as an extension of the NLI task and introduce the Implied NLI dataset (INLI) to help today's LLMs both recognize a broader variety of implied entailments and to distinguish between implicit and explicit entailment. We show how LLMs fine-tuned on INLI understand implied entailment and can generalize this understanding across datasets and domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07719v2-abstract-full').style.display = 'none'; document.getElementById('2501.07719v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05323">arXiv:2501.05323</a> <span> [<a href="https://arxiv.org/pdf/2501.05323">pdf</a>, <a href="https://arxiv.org/format/2501.05323">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Distributed Learning and Inference Systems: A Networking Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Moussa%2C+H+G">Hesham G. Moussa</a>, <a href="/search/cs?searchtype=author&query=Akhavain%2C+A">Arashmid Akhavain</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+S+M">S. Maryam Hosseini</a>, <a href="/search/cs?searchtype=author&query=McCormick%2C+B">Bill McCormick</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05323v1-abstract-short" style="display: inline;"> Machine learning models have achieved, and in some cases surpassed, human-level performance in various tasks, mainly through centralized training of static models and the use of large models stored in centralized clouds for inference. However, this centralized approach has several drawbacks, including privacy concerns, high storage demands, a single point of failure, and significant computing requ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05323v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05323v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05323v1-abstract-full" style="display: none;"> Machine learning models have achieved, and in some cases surpassed, human-level performance in various tasks, mainly through centralized training of static models and the use of large models stored in centralized clouds for inference. However, this centralized approach has several drawbacks, including privacy concerns, high storage demands, a single point of failure, and significant computing requirements. These challenges have driven interest in developing alternative decentralized and distributed methods for AI training and inference. Distribution introduces additional complexity, as it requires managing multiple moving parts. To address these complexities and fill a gap in the development of distributed AI systems, this work proposes a novel framework, Data and Dynamics-Aware Inference and Training Networks (DA-ITN). The different components of DA-ITN and their functions are explored, and the associated challenges and research areas are highlighted. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05323v1-abstract-full').style.display = 'none'; document.getElementById('2501.05323v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been submitted to IEEE Network magazine and is still under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06045">arXiv:2412.06045</a> <span> [<a href="https://arxiv.org/pdf/2412.06045">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Dilated Balanced Cross Entropy Loss for Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+S+M">Seyed Mohsen Hosseini</a>, <a href="/search/cs?searchtype=author&query=Baghshah%2C+M+S">Mahdieh Soleymani Baghshah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06045v1-abstract-short" style="display: inline;"> A novel method for tackling the problem of imbalanced data in medical image segmentation is proposed in this work. In balanced cross entropy (CE) loss, which is a type of weighted CE loss, the weight assigned to each class is the in-verse of the class frequency. These balancing weights are expected to equalize the effect of each class on the overall loss and prevent the model from being biased tow… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06045v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06045v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06045v1-abstract-full" style="display: none;"> A novel method for tackling the problem of imbalanced data in medical image segmentation is proposed in this work. In balanced cross entropy (CE) loss, which is a type of weighted CE loss, the weight assigned to each class is the in-verse of the class frequency. These balancing weights are expected to equalize the effect of each class on the overall loss and prevent the model from being biased towards the majority class. But, as it has been shown in previous studies, this method degrades the performance by a large margin. Therefore, balanced CE is not a popular loss in medical segmentation tasks, and usually a region-based loss, like the Dice loss, is used to address the class imbalance problem. In the pro-posed method, the weighting of cross entropy loss for each class is based on a dilated area of each class mask, and balancing weights are assigned to each class together with its surrounding pixels. The goal of this study is to show that the performance of balanced CE loss can be greatly improved my modifying its weighting strategy. Experiments on different datasets show that the proposed dilated balanced CE (DBCE) loss outperforms the balanced CE loss by a large margin and produces superior results compared to CE loss, and its performance is similar to the performance of the combination of Dice and CE loss. This means that a weighted cross entropy loss with the right weighing strategy can be as effective as a region-based loss in handling the problem of class imbalance in medical segmentation tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06045v1-abstract-full').style.display = 'none'; document.getElementById('2412.06045v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00678">arXiv:2412.00678</a> <span> [<a href="https://arxiv.org/pdf/2412.00678">pdf</a>, <a href="https://arxiv.org/format/2412.00678">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> 2DMamba: Efficient State Space Model for Image Representation with Applications on Giga-Pixel Whole Slide Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jingwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+A+T">Anh Tien Nguyen</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xi Han</a>, <a href="/search/cs?searchtype=author&query=Trinh%2C+V+Q">Vincent Quoc-Huy Trinh</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+H">Hong Qin</a>, <a href="/search/cs?searchtype=author&query=Samaras%2C+D">Dimitris Samaras</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+S">Mahdi S. Hosseini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00678v2-abstract-short" style="display: inline;"> Efficiently modeling large 2D contexts is essential for various fields including Giga-Pixel Whole Slide Imaging (WSI) and remote sensing. Transformer-based models offer high parallelism but face challenges due to their quadratic complexity for handling long sequences. Recently, Mamba introduced a selective State Space Model (SSM) with linear complexity and high parallelism, enabling effective and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00678v2-abstract-full').style.display = 'inline'; document.getElementById('2412.00678v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00678v2-abstract-full" style="display: none;"> Efficiently modeling large 2D contexts is essential for various fields including Giga-Pixel Whole Slide Imaging (WSI) and remote sensing. Transformer-based models offer high parallelism but face challenges due to their quadratic complexity for handling long sequences. Recently, Mamba introduced a selective State Space Model (SSM) with linear complexity and high parallelism, enabling effective and efficient modeling of wide context in 1D sequences. However, extending Mamba to vision tasks, which inherently involve 2D structures, results in spatial discrepancies due to the limitations of 1D sequence processing. On the other hand, current 2D SSMs inherently model 2D structures but they suffer from prohibitively slow computation due to the lack of efficient parallel algorithms. In this work, we propose 2DMamba, a novel 2D selective SSM framework that incorporates the 2D spatial structure of images into Mamba, with a highly optimized hardware-aware operator, adopting both spatial continuity and computational efficiency. We validate the versatility of our approach on both WSIs and natural images. Extensive experiments on 10 public datasets for WSI classification and survival analysis show that 2DMamba improves up to 2.48% in AUC, 3.11% in F1 score, 2.47% in accuracy and 5.52% in C-index. Additionally, integrating our method with VMamba for natural imaging yields 0.5 to 0.7 improvements in mIoU on the ADE20k semantic segmentation dataset, and 0.2% accuracy improvement on ImageNet-1K classification dataset. Our code is available at https://github.com/AtlasAnalyticsLab/2DMamba. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00678v2-abstract-full').style.display = 'none'; document.getElementById('2412.00678v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in CVPR 2025 Main Conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15719">arXiv:2411.15719</a> <span> [<a href="https://arxiv.org/pdf/2411.15719">pdf</a>, <a href="https://arxiv.org/format/2411.15719">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Comparative Analysis of Diffusion Generative Models in Computational Pathology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thakkar%2C+D">Denisha Thakkar</a>, <a href="/search/cs?searchtype=author&query=Trinh%2C+V+Q">Vincent Quoc-Huy Trinh</a>, <a href="/search/cs?searchtype=author&query=Varma%2C+S">Sonal Varma</a>, <a href="/search/cs?searchtype=author&query=Kahou%2C+S+E">Samira Ebrahimi Kahou</a>, <a href="/search/cs?searchtype=author&query=Rivaz%2C+H">Hassan Rivaz</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+S">Mahdi S. Hosseini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15719v1-abstract-short" style="display: inline;"> Diffusion Generative Models (DGM) have rapidly surfaced as emerging topics in the field of computer vision, garnering significant interest across a wide array of deep learning applications. Despite their high computational demand, these models are extensively utilized for their superior sample quality and robust mode coverage. While research in diffusion generative models is advancing, exploration… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15719v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15719v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15719v1-abstract-full" style="display: none;"> Diffusion Generative Models (DGM) have rapidly surfaced as emerging topics in the field of computer vision, garnering significant interest across a wide array of deep learning applications. Despite their high computational demand, these models are extensively utilized for their superior sample quality and robust mode coverage. While research in diffusion generative models is advancing, exploration within the domain of computational pathology and its large-scale datasets has been comparatively gradual. Bridging the gap between the high-quality generation capabilities of Diffusion Generative Models and the intricate nature of pathology data, this paper presents an in-depth comparative analysis of diffusion methods applied to a pathology dataset. Our analysis extends to datasets with varying Fields of View (FOV), revealing that DGMs are highly effective in producing high-quality synthetic data. An ablative study is also conducted, followed by a detailed discussion on the impact of various methods on the synthesized histopathology images. One striking observation from our experiments is how the adjustment of image size during data generation can simulate varying fields of view. These findings underscore the potential of DGMs to enhance the quality and diversity of synthetic pathology data, especially when used with real data, ultimately increasing accuracy of deep learning models in histopathology. Code is available from https://github.com/AtlasAnalyticsLab/Diffusion4Path <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15719v1-abstract-full').style.display = 'none'; document.getElementById('2411.15719v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted paper under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08054">arXiv:2411.08054</a> <span> [<a href="https://arxiv.org/pdf/2411.08054">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GREI Data Repository AI Taxonomy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chodacki%2C+J">John Chodacki</a>, <a href="/search/cs?searchtype=author&query=Hanhel%2C+M">Mark Hanhel</a>, <a href="/search/cs?searchtype=author&query=Iacus%2C+S">Stefano Iacus</a>, <a href="/search/cs?searchtype=author&query=Scherle%2C+R">Ryan Scherle</a>, <a href="/search/cs?searchtype=author&query=Olson%2C+E">Eric Olson</a>, <a href="/search/cs?searchtype=author&query=Pfeiffer%2C+N">Nici Pfeiffer</a>, <a href="/search/cs?searchtype=author&query=Holmes%2C+K">Kristi Holmes</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Mohammad Hosseini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08054v1-abstract-short" style="display: inline;"> The Generalist Repository Ecosystem Initiative (GREI), funded by the NIH, developed an AI taxonomy tailored to data repository roles to guide AI integration across repository management. It categorizes the roles into stages, including acquisition, validation, organization, enhancement, analysis, sharing, and user support, providing a structured framework for implementing AI in repository workflows… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08054v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08054v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08054v1-abstract-full" style="display: none;"> The Generalist Repository Ecosystem Initiative (GREI), funded by the NIH, developed an AI taxonomy tailored to data repository roles to guide AI integration across repository management. It categorizes the roles into stages, including acquisition, validation, organization, enhancement, analysis, sharing, and user support, providing a structured framework for implementing AI in repository workflows. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08054v1-abstract-full').style.display = 'none'; document.getElementById('2411.08054v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05959">arXiv:2411.05959</a> <span> [<a href="https://arxiv.org/pdf/2411.05959">pdf</a>, <a href="https://arxiv.org/format/2411.05959">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Efficient Self-Supervised Barlow Twins from Limited Tissue Slide Cohorts for Colonic Pathology Diagnostics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Notton%2C+C">Cassandre Notton</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+V">Vasudev Sharma</a>, <a href="/search/cs?searchtype=author&query=Trinh%2C+V+Q">Vincent Quoc-Huy Trinh</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lina Chen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Minqi Xu</a>, <a href="/search/cs?searchtype=author&query=Varma%2C+S">Sonal Varma</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+S">Mahdi S. Hosseini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05959v1-abstract-short" style="display: inline;"> Colorectal cancer (CRC) is one of the few cancers that have an established dysplasia-carcinoma sequence that benefits from screening. Everyone over 50 years of age in Canada is eligible for CRC screening. About 20\% of those people will undergo a biopsy for a pre-neoplastic polyp and, in many cases, multiple polyps. As such, these polyp biopsies make up the bulk of a pathologist's workload. Develo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05959v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05959v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05959v1-abstract-full" style="display: none;"> Colorectal cancer (CRC) is one of the few cancers that have an established dysplasia-carcinoma sequence that benefits from screening. Everyone over 50 years of age in Canada is eligible for CRC screening. About 20\% of those people will undergo a biopsy for a pre-neoplastic polyp and, in many cases, multiple polyps. As such, these polyp biopsies make up the bulk of a pathologist's workload. Developing an efficient computational model to help screen these polyp biopsies can improve the pathologist's workflow and help guide their attention to critical areas on the slide. DL models face significant challenges in computational pathology (CPath) because of the gigapixel image size of whole-slide images and the scarcity of detailed annotated datasets. It is, therefore, crucial to leverage self-supervised learning (SSL) methods to alleviate the burden and cost of data annotation. However, current research lacks methods to apply SSL frameworks to analyze pathology data effectively. This paper aims to propose an optimized Barlow Twins framework for colorectal polyps screening. We adapt its hyperparameters, augmentation strategy and encoder to the specificity of the pathology data to enhance performance. Additionally, we investigate the best Field of View (FoV) for colorectal polyps screening and propose a new benchmark dataset for CRC screening, made of four types of colorectal polyps and normal tissue, by performing downstream tasking on MHIST and NCT-CRC-7K datasets. Furthermore, we show that the SSL representations are more meaningful and qualitative than the supervised ones and that Barlow Twins benefits from the Swin Transformer when applied to pathology data. Codes are avaialble from https://github.com/AtlasAnalyticsLab/PathBT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05959v1-abstract-full').style.display = 'none'; document.getElementById('2411.05959v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submission Under Review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22506">arXiv:2410.22506</a> <span> [<a href="https://arxiv.org/pdf/2410.22506">pdf</a>, <a href="https://arxiv.org/format/2410.22506">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AffectNet+: A Database for Enhancing Facial Expression Recognition with Soft-Labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fard%2C+A+P">Ali Pourramezan Fard</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+M">Mohammad Mehdi Hosseini</a>, <a href="/search/cs?searchtype=author&query=Sweeny%2C+T+D">Timothy D. Sweeny</a>, <a href="/search/cs?searchtype=author&query=Mahoor%2C+M+H">Mohammad H. Mahoor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22506v1-abstract-short" style="display: inline;"> Automated Facial Expression Recognition (FER) is challenging due to intra-class variations and inter-class similarities. FER can be especially difficult when facial expressions reflect a mixture of various emotions (aka compound expressions). Existing FER datasets, such as AffectNet, provide discrete emotion labels (hard-labels), where a single category of emotion is assigned to an expression. To… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22506v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22506v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22506v1-abstract-full" style="display: none;"> Automated Facial Expression Recognition (FER) is challenging due to intra-class variations and inter-class similarities. FER can be especially difficult when facial expressions reflect a mixture of various emotions (aka compound expressions). Existing FER datasets, such as AffectNet, provide discrete emotion labels (hard-labels), where a single category of emotion is assigned to an expression. To alleviate inter- and intra-class challenges, as well as provide a better facial expression descriptor, we propose a new approach to create FER datasets through a labeling method in which an image is labeled with more than one emotion (called soft-labels), each with different confidences. Specifically, we introduce the notion of soft-labels for facial expression datasets, a new approach to affective computing for more realistic recognition of facial expressions. To achieve this goal, we propose a novel methodology to accurately calculate soft-labels: a vector representing the extent to which multiple categories of emotion are simultaneously present within a single facial expression. Finding smoother decision boundaries, enabling multi-labeling, and mitigating bias and imbalanced data are some of the advantages of our proposed method. Building upon AffectNet, we introduce AffectNet+, the next-generation facial expression dataset. This dataset contains soft-labels, three categories of data complexity subsets, and additional metadata such as age, gender, ethnicity, head pose, facial landmarks, valence, and arousal. AffectNet+ will be made publicly accessible to researchers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22506v1-abstract-full').style.display = 'none'; document.getElementById('2410.22506v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22092">arXiv:2410.22092</a> <span> [<a href="https://arxiv.org/pdf/2410.22092">pdf</a>, <a href="https://arxiv.org/format/2410.22092">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Towards Data-Informed Interventions: Opportunities and Challenges of Street-level Multimodal Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rulff%2C+J">Joao Rulff</a>, <a href="/search/cs?searchtype=author&query=Pereira%2C+G">Giancarlo Pereira</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Maryam Hosseini</a>, <a href="/search/cs?searchtype=author&query=Lage%2C+M">Marcos Lage</a>, <a href="/search/cs?searchtype=author&query=Silva%2C+C">Claudio Silva</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22092v1-abstract-short" style="display: inline;"> Over the past decades, improvements in data collection hardware coupled with novel artificial intelligence algorithms have made it possible for researchers to understand urban environments at an unprecedented scale. From local interactions between actors to city-wide infrastructural problems, this new data-driven approach enables a more informed and trustworthy decision-making process aiming at tr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22092v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22092v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22092v1-abstract-full" style="display: none;"> Over the past decades, improvements in data collection hardware coupled with novel artificial intelligence algorithms have made it possible for researchers to understand urban environments at an unprecedented scale. From local interactions between actors to city-wide infrastructural problems, this new data-driven approach enables a more informed and trustworthy decision-making process aiming at transforming cities into safer and more equitable places for living. This new moment unfolded new opportunities to understand various phenomena that directly impact how accessible cities are to heterogeneous populations. Specifically, sensing localized physical interactions among actors under different scenarios can drive substantial interventions in urban environments to make them safer for all. In this manuscript, we list opportunities and associated challenges to leverage street-level multimodal sensing data to empower domain experts in making more informed decisions and, ultimately, supporting a data-informed policymaking framework. The challenges presented here can motivate research in different areas, such as computer vision and human-computer interaction, to support cities in growing more sustainably. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22092v1-abstract-full').style.display = 'none'; document.getElementById('2410.22092v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ASSETS 2024 UrbanAccess Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12053">arXiv:2409.12053</a> <span> [<a href="https://arxiv.org/pdf/2409.12053">pdf</a>, <a href="https://arxiv.org/format/2409.12053">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> </div> </div> <p class="title is-5 mathjax"> Extended Deep Submodular Functions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+S+M">Seyed Mohammad Hosseini</a>, <a href="/search/cs?searchtype=author&query=Jamshid%2C+A">Arash Jamshid</a>, <a href="/search/cs?searchtype=author&query=Noormousavi%2C+S+M">Seyed Mahdi Noormousavi</a>, <a href="/search/cs?searchtype=author&query=Siavoshani%2C+M+J">Mahdi Jafari Siavoshani</a>, <a href="/search/cs?searchtype=author&query=Omidvar%2C+N">Naeimeh Omidvar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12053v1-abstract-short" style="display: inline;"> We introduce a novel category of set functions called Extended Deep Submodular functions (EDSFs), which are neural network-representable. EDSFs serve as an extension of Deep Submodular Functions (DSFs), inheriting crucial properties from DSFs while addressing innate limitations. It is known that DSFs can represent a limiting subset of submodular functions. In contrast, through an analysis of polym… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12053v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12053v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12053v1-abstract-full" style="display: none;"> We introduce a novel category of set functions called Extended Deep Submodular functions (EDSFs), which are neural network-representable. EDSFs serve as an extension of Deep Submodular Functions (DSFs), inheriting crucial properties from DSFs while addressing innate limitations. It is known that DSFs can represent a limiting subset of submodular functions. In contrast, through an analysis of polymatroid properties, we establish that EDSFs possess the capability to represent all monotone submodular functions, a notable enhancement compared to DSFs. Furthermore, our findings demonstrate that EDSFs can represent any monotone set function, indicating the family of EDSFs is equivalent to the family of all monotone set functions. Additionally, we prove that EDSFs maintain the concavity inherent in DSFs when the components of the input vector are non-negative real numbers-an essential feature in certain combinatorial optimization problems. Through extensive experiments, we illustrate that EDSFs exhibit significantly lower empirical generalization error than DSFs in the learning of coverage functions. This suggests that EDSFs present a promising advancement in the representation and learning of set functions with improved generalization capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12053v1-abstract-full').style.display = 'none'; document.getElementById('2409.12053v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06139">arXiv:2408.06139</a> <span> [<a href="https://arxiv.org/pdf/2408.06139">pdf</a>, <a href="https://arxiv.org/format/2408.06139">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TVCG.2024.3456353">10.1109/TVCG.2024.3456353 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Curio: A Dataflow-Based Framework for Collaborative Urban Visual Analytics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Moreira%2C+G">Gustavo Moreira</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Maryam Hosseini</a>, <a href="/search/cs?searchtype=author&query=Veiga%2C+C">Carolina Veiga</a>, <a href="/search/cs?searchtype=author&query=Alexandre%2C+L">Lucas Alexandre</a>, <a href="/search/cs?searchtype=author&query=Colaninno%2C+N">Nicola Colaninno</a>, <a href="/search/cs?searchtype=author&query=de+Oliveira%2C+D">Daniel de Oliveira</a>, <a href="/search/cs?searchtype=author&query=Ferreira%2C+N">Nivan Ferreira</a>, <a href="/search/cs?searchtype=author&query=Lage%2C+M">Marcos Lage</a>, <a href="/search/cs?searchtype=author&query=Miranda%2C+F">Fabio Miranda</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06139v1-abstract-short" style="display: inline;"> Over the past decade, several urban visual analytics systems and tools have been proposed to tackle a host of challenges faced by cities, in areas as diverse as transportation, weather, and real estate. Many of these tools have been designed through collaborations with urban experts, aiming to distill intricate urban analysis workflows into interactive visualizations and interfaces. However, the d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06139v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06139v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06139v1-abstract-full" style="display: none;"> Over the past decade, several urban visual analytics systems and tools have been proposed to tackle a host of challenges faced by cities, in areas as diverse as transportation, weather, and real estate. Many of these tools have been designed through collaborations with urban experts, aiming to distill intricate urban analysis workflows into interactive visualizations and interfaces. However, the design, implementation, and practical use of these tools still rely on siloed approaches, resulting in bespoke applications that are difficult to reproduce and extend. At the design level, these tools undervalue rich data workflows from urban experts, typically treating them only as data providers and evaluators. At the implementation level, they lack interoperability with other technical frameworks. At the practical use level, they tend to be narrowly focused on specific fields, inadvertently creating barriers to cross-domain collaboration. To address these gaps, we present Curio, a framework for collaborative urban visual analytics. Curio uses a dataflow model with multiple abstraction levels (code, grammar, GUI elements) to facilitate collaboration across the design and implementation of visual analytics components. The framework allows experts to intertwine data preprocessing, management, and visualization stages while tracking the provenance of code and visualizations. In collaboration with urban experts, we evaluate Curio through a diverse set of usage scenarios targeting urban accessibility, urban microclimate, and sunlight access. These scenarios use different types of data and domain methodologies to illustrate Curio's flexibility in tackling pressing societal challenges. Curio is available at https://urbantk.org/curio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06139v1-abstract-full').style.display = 'none'; document.getElementById('2408.06139v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at IEEE VIS 2024. Source code available at https://urbantk.org/curio</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03395">arXiv:2408.03395</a> <span> [<a href="https://arxiv.org/pdf/2408.03395">pdf</a>, <a href="https://arxiv.org/format/2408.03395">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> An Analysis of Automated Use Case Component Extraction from Scenarios using ChatGPT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=KC%2C+P">Pragyan KC</a>, <a href="/search/cs?searchtype=author&query=Slavin%2C+R">Rocky Slavin</a>, <a href="/search/cs?searchtype=author&query=Ghanavati%2C+S">Sepideh Ghanavati</a>, <a href="/search/cs?searchtype=author&query=Breaux%2C+T">Travis Breaux</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+B">Mitra Bokaei Hosseini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03395v1-abstract-short" style="display: inline;"> Mobile applications (apps) are often developed by only a small number of developers with limited resources, especially in the early years of the app's development. In this setting, many requirements acquisition activities, such as interviews, are challenging or lower priority than development and release activities. Moreover, in this early period, requirements are frequently changing as mobile app… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03395v1-abstract-full').style.display = 'inline'; document.getElementById('2408.03395v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03395v1-abstract-full" style="display: none;"> Mobile applications (apps) are often developed by only a small number of developers with limited resources, especially in the early years of the app's development. In this setting, many requirements acquisition activities, such as interviews, are challenging or lower priority than development and release activities. Moreover, in this early period, requirements are frequently changing as mobile apps evolve to compete in the marketplace. As app development companies move to standardize their development processes, however, they will shift to documenting and analyzing requirements. One low-cost source of requirements post-deployment are user-authored scenarios describing how they interact with an app. We propose a method for extracting use case components from user-authored scenarios using large language models (LLMs). The method consists of a series of prompts that were developed to improve precision and recall on a ground truth dataset of 50 scenarios independently labeled with UC components. Our results reveal that LLMs require additional domain knowledge to extract UC components, and that refining prompts to include this knowledge improves the quality of the extracted UC components. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03395v1-abstract-full').style.display = 'none'; document.getElementById('2408.03395v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09592">arXiv:2407.09592</a> <span> [<a href="https://arxiv.org/pdf/2407.09592">pdf</a>, <a href="https://arxiv.org/format/2407.09592">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Toward Regulatory Compliance: A few-shot Learning Approach to Extract Processing Activities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=KC%2C+P">Pragyan KC</a>, <a href="/search/cs?searchtype=author&query=Ghandiparsi%2C+R">Rambod Ghandiparsi</a>, <a href="/search/cs?searchtype=author&query=Slavin%2C+R">Rocky Slavin</a>, <a href="/search/cs?searchtype=author&query=Ghanavati%2C+S">Sepideh Ghanavati</a>, <a href="/search/cs?searchtype=author&query=Breaux%2C+T">Travis Breaux</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+B">Mitra Bokaei Hosseini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09592v1-abstract-short" style="display: inline;"> The widespread use of mobile applications has driven the growth of the industry, with companies relying heavily on user data for services like targeted advertising and personalized offerings. In this context, privacy regulations such as the General Data Protection Regulation (GDPR) play a crucial role. One of the GDPR requirements is the maintenance of a Record of Processing Activities (RoPA) by… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09592v1-abstract-full').style.display = 'inline'; document.getElementById('2407.09592v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09592v1-abstract-full" style="display: none;"> The widespread use of mobile applications has driven the growth of the industry, with companies relying heavily on user data for services like targeted advertising and personalized offerings. In this context, privacy regulations such as the General Data Protection Regulation (GDPR) play a crucial role. One of the GDPR requirements is the maintenance of a Record of Processing Activities (RoPA) by companies. RoPA encompasses various details, including the description of data processing activities, their purposes, types of data involved, and other relevant external entities. Small app-developing companies face challenges in meeting such compliance requirements due to resource limitations and tight timelines. To aid these developers and prevent fines, we propose a method to generate segments of RoPA from user-authored usage scenarios using large language models (LLMs). Our method employs few-shot learning with GPT-3.5 Turbo to summarize usage scenarios and generate RoPA segments. We evaluate different factors that can affect few-shot learning performance consistency for our summarization task, including the number of examples in few-shot learning prompts, repetition, and order permutation of examples in the prompts. Our findings highlight the significant influence of the number of examples in prompts on summarization F1 scores, while demonstrating negligible variability in F1 scores across multiple prompt repetitions. Our prompts achieve successful summarization of processing activities with an average 70% ROUGE-L F1 score. Finally, we discuss avenues for improving results through manual evaluation of the generated summaries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09592v1-abstract-full').style.display = 'none'; document.getElementById('2407.09592v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in the the 11th International Workshop on Evolving Security & Privacy Requirements Engineering (ESPRE)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.06464">arXiv:2407.06464</a> <span> [<a href="https://arxiv.org/pdf/2407.06464">pdf</a>, <a href="https://arxiv.org/format/2407.06464">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> SideSeeing: A multimodal dataset and collection of tools for sidewalk assessment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Damaceno%2C+R+J+P">R. J. P. Damaceno</a>, <a href="/search/cs?searchtype=author&query=Ferreira%2C+L">L. Ferreira</a>, <a href="/search/cs?searchtype=author&query=Miranda%2C+F">F. Miranda</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">M. Hosseini</a>, <a href="/search/cs?searchtype=author&query=Cesar%2C+R+M">R. M. Cesar Jr</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.06464v2-abstract-short" style="display: inline;"> This paper introduces SideSeeing, a novel initiative that provides tools and datasets for assessing the built environment. We present a framework for street-level data acquisition, loading, and analysis. Using the framework, we collected a novel dataset that integrates synchronized video footaged captured from chest-mounted mobile devices with sensor data (accelerometer, gyroscope, magnetometer, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06464v2-abstract-full').style.display = 'inline'; document.getElementById('2407.06464v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.06464v2-abstract-full" style="display: none;"> This paper introduces SideSeeing, a novel initiative that provides tools and datasets for assessing the built environment. We present a framework for street-level data acquisition, loading, and analysis. Using the framework, we collected a novel dataset that integrates synchronized video footaged captured from chest-mounted mobile devices with sensor data (accelerometer, gyroscope, magnetometer, and GPS). Each data sample represents a path traversed by a user filming sidewalks near hospitals in Brazil and the USA. The dataset encompasses three hours of content covering 12 kilometers around nine hospitals, and includes 325,000 video frames with corresponding sensor data. Additionally, we present a novel 68-element taxonomy specifically created for sidewalk scene identification. SideSeeing is a step towards a suite of tools that urban experts can use to perform in-depth sidewalk accessibility evaluations. SideSeeing data and tools are publicly available at https://sites.usp.br/sideseeing/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06464v2-abstract-full').style.display = 'none'; document.getElementById('2407.06464v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03552">arXiv:2407.03552</a> <span> [<a href="https://arxiv.org/pdf/2407.03552">pdf</a>, <a href="https://arxiv.org/format/2407.03552">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Vision Mamba for Classification of Breast Ultrasound Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nasiri-Sarvi%2C+A">Ali Nasiri-Sarvi</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+S">Mahdi S. Hosseini</a>, <a href="/search/cs?searchtype=author&query=Rivaz%2C+H">Hassan Rivaz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03552v2-abstract-short" style="display: inline;"> Mamba-based models, VMamba and Vim, are a recent family of vision encoders that offer promising performance improvements in many computer vision tasks. This paper compares Mamba-based models with traditional Convolutional Neural Networks (CNNs) and Vision Transformers (ViTs) using the breast ultrasound BUSI dataset and Breast Ultrasound B dataset. Our evaluation, which includes multiple runs of ex… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03552v2-abstract-full').style.display = 'inline'; document.getElementById('2407.03552v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03552v2-abstract-full" style="display: none;"> Mamba-based models, VMamba and Vim, are a recent family of vision encoders that offer promising performance improvements in many computer vision tasks. This paper compares Mamba-based models with traditional Convolutional Neural Networks (CNNs) and Vision Transformers (ViTs) using the breast ultrasound BUSI dataset and Breast Ultrasound B dataset. Our evaluation, which includes multiple runs of experiments and statistical significance analysis, demonstrates that some of the Mamba-based architectures often outperform CNN and ViT models with statistically significant results. For example, in the B dataset, the best Mamba-based models have a 1.98\% average AUC and a 5.0\% average Accuracy improvement compared to the best non-Mamba-based model in this study. These Mamba-based models effectively capture long-range dependencies while maintaining some inductive biases, making them suitable for applications with limited data. The code is available at \url{https://github.com/anasiri/BU-Mamba} <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03552v2-abstract-full').style.display = 'none'; document.getElementById('2407.03552v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in MICCAI 2024 Deep-Breath workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19803">arXiv:2406.19803</a> <span> [<a href="https://arxiv.org/pdf/2406.19803">pdf</a>, <a href="https://arxiv.org/format/2406.19803">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Scalable and Domain-General Abstractive Proposition Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+J">Mohammad Javad Hosseini</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yang Gao</a>, <a href="/search/cs?searchtype=author&query=Baumg%C3%A4rtner%2C+T">Tim Baumg盲rtner</a>, <a href="/search/cs?searchtype=author&query=Fabrikant%2C+A">Alex Fabrikant</a>, <a href="/search/cs?searchtype=author&query=Amplayo%2C+R+K">Reinald Kim Amplayo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19803v2-abstract-short" style="display: inline;"> Segmenting text into fine-grained units of meaning is important to a wide range of NLP applications. The default approach of segmenting text into sentences is often insufficient, especially since sentences are usually complex enough to include multiple units of meaning that merit separate treatment in the downstream task. We focus on the task of abstractive proposition segmentation (APS): transfor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19803v2-abstract-full').style.display = 'inline'; document.getElementById('2406.19803v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19803v2-abstract-full" style="display: none;"> Segmenting text into fine-grained units of meaning is important to a wide range of NLP applications. The default approach of segmenting text into sentences is often insufficient, especially since sentences are usually complex enough to include multiple units of meaning that merit separate treatment in the downstream task. We focus on the task of abstractive proposition segmentation (APS): transforming text into simple, self-contained, well-formed sentences. Several recent works have demonstrated the utility of proposition segmentation with few-shot prompted LLMs for downstream tasks such as retrieval-augmented grounding and fact verification. However, this approach does not scale to large amounts of text and may not always extract all the facts from the input text. In this paper, we first introduce evaluation metrics for the task to measure several dimensions of quality. We then propose a scalable, yet accurate, proposition segmentation model. We model proposition segmentation as a supervised task by training LLMs on existing annotated datasets and show that training yields significantly improved results. We further show that by using the fine-tuned LLMs (Gemini Pro and Gemini Ultra) as teachers for annotating large amounts of multi-domain synthetic distillation data, we can train smaller student models (Gemma 1 2B and 7B) with results similar to the teacher LLMs. We then demonstrate that our technique leads to effective domain generalization, by annotating data in two domains outside the original training data and evaluating on them. Finally, as a key contribution of the paper, we share an easy-to-use API for NLP practitioners to use. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19803v2-abstract-full').style.display = 'none'; document.getElementById('2406.19803v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01551">arXiv:2406.01551</a> <span> [<a href="https://arxiv.org/pdf/2406.01551">pdf</a>, <a href="https://arxiv.org/format/2406.01551">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ELSA: Evaluating Localization of Social Activities in Urban Streets using Open-Vocabulary Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Maryam Hosseini</a>, <a href="/search/cs?searchtype=author&query=Cipriano%2C+M">Marco Cipriano</a>, <a href="/search/cs?searchtype=author&query=Eslami%2C+S">Sedigheh Eslami</a>, <a href="/search/cs?searchtype=author&query=Hodczak%2C+D">Daniel Hodczak</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Liu Liu</a>, <a href="/search/cs?searchtype=author&query=Sevtsuk%2C+A">Andres Sevtsuk</a>, <a href="/search/cs?searchtype=author&query=de+Melo%2C+G">Gerard de Melo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01551v2-abstract-short" style="display: inline;"> Existing Open Vocabulary Detection (OVD) models exhibit a number of challenges. They often struggle with semantic consistency across diverse inputs, and are often sensitive to slight variations in input phrasing, leading to inconsistent performance. The calibration of their predictive confidence, especially in complex multi-label scenarios, remains suboptimal, frequently resulting in overconfident… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01551v2-abstract-full').style.display = 'inline'; document.getElementById('2406.01551v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01551v2-abstract-full" style="display: none;"> Existing Open Vocabulary Detection (OVD) models exhibit a number of challenges. They often struggle with semantic consistency across diverse inputs, and are often sensitive to slight variations in input phrasing, leading to inconsistent performance. The calibration of their predictive confidence, especially in complex multi-label scenarios, remains suboptimal, frequently resulting in overconfident predictions that do not accurately reflect their context understanding. To understand these limitations, multi-label detection benchmarks are needed. A particularly challenging domain for such benchmarking is social activities. Due to the lack of multi-label benchmarks for social interactions, in this work we present ELSA: Evaluating Localization of Social Activities. ELSA draws on theoretical frameworks in urban sociology and design and uses in-the-wild street-level imagery, where the size of groups and the types of activities vary significantly. ELSA includes more than 900 manually annotated images with more than 4,300 multi-labeled bounding boxes for individual and group activities. We introduce a novel confidence score computation method NLSE and a novel Dynamic Box Aggregation (DBA) algorithm to assess semantic consistency in overlapping predictions. We report our results on the widely-used SOTA models Grounding DINO, Detic, OWL, and MDETR. Our evaluation protocol considers semantic stability and localization accuracy and further exposes the limitations of existing approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01551v2-abstract-full').style.display = 'none'; document.getElementById('2406.01551v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18942">arXiv:2405.18942</a> <span> [<a href="https://arxiv.org/pdf/2405.18942">pdf</a>, <a href="https://arxiv.org/format/2405.18942">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Verifiably Robust Conformal Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jeary%2C+L">Linus Jeary</a>, <a href="/search/cs?searchtype=author&query=Kuipers%2C+T">Tom Kuipers</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Mehran Hosseini</a>, <a href="/search/cs?searchtype=author&query=Paoletti%2C+N">Nicola Paoletti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18942v3-abstract-short" style="display: inline;"> Conformal Prediction (CP) is a popular uncertainty quantification method that provides distribution-free, statistically valid prediction sets, assuming that training and test data are exchangeable. In such a case, CP's prediction sets are guaranteed to cover the (unknown) true test output with a user-specified probability. Nevertheless, this guarantee is violated when the data is subjected to adve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18942v3-abstract-full').style.display = 'inline'; document.getElementById('2405.18942v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18942v3-abstract-full" style="display: none;"> Conformal Prediction (CP) is a popular uncertainty quantification method that provides distribution-free, statistically valid prediction sets, assuming that training and test data are exchangeable. In such a case, CP's prediction sets are guaranteed to cover the (unknown) true test output with a user-specified probability. Nevertheless, this guarantee is violated when the data is subjected to adversarial attacks, which often result in a significant loss of coverage. Recently, several approaches have been put forward to recover CP guarantees in this setting. These approaches leverage variations of randomised smoothing to produce conservative sets which account for the effect of the adversarial perturbations. They are, however, limited in that they only support $\ell^2$-bounded perturbations and classification tasks. This paper introduces VRCP (Verifiably Robust Conformal Prediction), a new framework that leverages recent neural network verification methods to recover coverage guarantees under adversarial attacks. Our VRCP method is the first to support perturbations bounded by arbitrary norms including $\ell^1$, $\ell^2$, and $\ell^\infty$, as well as regression tasks. We evaluate and compare our approach on image classification tasks (CIFAR10, CIFAR100, and TinyImageNet) and regression tasks for deep reinforcement learning environments. In every case, VRCP achieves above nominal coverage and yields significantly more efficient and informative prediction regions than the SotA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18942v3-abstract-full').style.display = 'none'; document.getElementById('2405.18942v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NeurIPS 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T37 (Primary) 68T27 (Secondary) <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> G.3; I.2.4; F.4.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16397">arXiv:2405.16397</a> <span> [<a href="https://arxiv.org/pdf/2405.16397">pdf</a>, <a href="https://arxiv.org/format/2405.16397">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> AdaFisher: Adaptive Second Order Optimization via Fisher Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gomes%2C+D+M">Damien Martins Gomes</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yanlei Zhang</a>, <a href="/search/cs?searchtype=author&query=Belilovsky%2C+E">Eugene Belilovsky</a>, <a href="/search/cs?searchtype=author&query=Wolf%2C+G">Guy Wolf</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+S">Mahdi S. Hosseini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16397v3-abstract-short" style="display: inline;"> First-order optimization methods are currently the mainstream in training deep neural networks (DNNs). Optimizers like Adam incorporate limited curvature information by employing the diagonal matrix preconditioning of the stochastic gradient during the training. Despite their widespread, second-order optimization algorithms exhibit superior convergence properties compared to their first-order coun… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16397v3-abstract-full').style.display = 'inline'; document.getElementById('2405.16397v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16397v3-abstract-full" style="display: none;"> First-order optimization methods are currently the mainstream in training deep neural networks (DNNs). Optimizers like Adam incorporate limited curvature information by employing the diagonal matrix preconditioning of the stochastic gradient during the training. Despite their widespread, second-order optimization algorithms exhibit superior convergence properties compared to their first-order counterparts e.g. Adam and SGD. However, their practicality in training DNNs is still limited due to increased per-iteration computations compared to the first-order methods. We present \emph{AdaFisher}--an adaptive second-order optimizer that leverages a \emph{diagonal block-Kronecker} approximation of the Fisher information matrix for adaptive gradient preconditioning. AdaFisher aims to bridge the gap between enhanced \emph{convergence/generalization} capabilities and computational efficiency in second-order optimization framework for training DNNs. Despite the slow pace of second-order optimizers, we showcase that AdaFisher can be reliably adopted for image classification, language modeling and stands out for its stability and robustness in hyper-parameter tuning. We demonstrate that AdaFisher \textbf{outperforms the SOTA optimizers} in terms of both accuracy and convergence speed. Code is available from https://github.com/AtlasAnalyticsLab/AdaFisher. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16397v3-abstract-full').style.display = 'none'; document.getElementById('2405.16397v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.10648">arXiv:2405.10648</a> <span> [<a href="https://arxiv.org/pdf/2405.10648">pdf</a>, <a href="https://arxiv.org/format/2405.10648">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Optimal Service Placement, Request Routing and CPU Sizing in Cooperative Mobile Edge Computing Networks for Delay-Sensitive Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Omidvar%2C+N">Naeimeh Omidvar</a>, <a href="/search/cs?searchtype=author&query=Ahmadi%2C+M">Mahdieh Ahmadi</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+S+M">Seyed Mohammad Hosseini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.10648v1-abstract-short" style="display: inline;"> We study joint optimization of service placement, request routing, and CPU sizing in a cooperative MEC system. The problem is considered from the perspective of the service provider (SP), which delivers heterogeneous MEC-enabled delay-sensitive services, and needs to pay for the used resources to the mobile network operators and the cloud provider, while earning revenue from the served requests. W… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10648v1-abstract-full').style.display = 'inline'; document.getElementById('2405.10648v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.10648v1-abstract-full" style="display: none;"> We study joint optimization of service placement, request routing, and CPU sizing in a cooperative MEC system. The problem is considered from the perspective of the service provider (SP), which delivers heterogeneous MEC-enabled delay-sensitive services, and needs to pay for the used resources to the mobile network operators and the cloud provider, while earning revenue from the served requests. We formulate the problem of maximizing the SP's total profit subject to the computation, storage, and communication constraints of each edge node and end-to-end delay requirements of the services as a mixed-integer non-convex optimization problem, and prove it to be NP-hard. To tackle the challenges in solving the problem, we first introduce a design trade-off parameter for different delay requirements of each service, which maintains flexibility in prioritizing them, and transform the original optimization problem by the new delay constraints. Then, by exploiting a hidden convexity, we reformulate the delay constraints into an equivalent form. Next, to handle the challenge of the complicating (integer) variables, using primal decomposition, we decompose the problem into an equivalent form of master and inner sub-problems over the mixed and real variables, respectively. We then employ a cutting-plane approach for building up adequate representations of the extremal value of the inner problem as a function of the complicating variables and the set of values of the complicating variables for which the inner problem is feasible. Finally, we propose a solution strategy based on generalized Benders decomposition and prove its convergence to the optimal solution within a limited number of iterations. Extensive simulation results demonstrate that the proposed scheme significantly outperforms the existing mechanisms in terms of the SP's profit, cache hit ratio, running time, and end-to-end delay. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10648v1-abstract-full').style.display = 'none'; document.getElementById('2405.10648v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.10622">arXiv:2405.10622</a> <span> [<a href="https://arxiv.org/pdf/2405.10622">pdf</a>, <a href="https://arxiv.org/ps/2405.10622">ps</a>, <a href="https://arxiv.org/format/2405.10622">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Differentially Private Machine Learning-powered Combinatorial Auction Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jamshidi%2C+A">Arash Jamshidi</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+S+M">Seyed Mohammad Hosseini</a>, <a href="/search/cs?searchtype=author&query=Noormousavi%2C+S+M">Seyed Mahdi Noormousavi</a>, <a href="/search/cs?searchtype=author&query=Siavoshani%2C+M+J">Mahdi Jafari Siavoshani</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.10622v1-abstract-short" style="display: inline;"> We present a new approach to machine learning-powered combinatorial auctions, which is based on the principles of Differential Privacy. Our methodology guarantees that the auction mechanism is truthful, meaning that rational bidders have the incentive to reveal their true valuation functions. We achieve this by inducing truthfulness in the auction dynamics, ensuring that bidders consistently provi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10622v1-abstract-full').style.display = 'inline'; document.getElementById('2405.10622v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.10622v1-abstract-full" style="display: none;"> We present a new approach to machine learning-powered combinatorial auctions, which is based on the principles of Differential Privacy. Our methodology guarantees that the auction mechanism is truthful, meaning that rational bidders have the incentive to reveal their true valuation functions. We achieve this by inducing truthfulness in the auction dynamics, ensuring that bidders consistently provide accurate information about their valuation functions. Our method not only ensures truthfulness but also preserves the efficiency of the original auction. This means that if the initial auction outputs an allocation with high social welfare, our modified truthful version of the auction will also achieve high social welfare. We use techniques from Differential Privacy, such as the Exponential Mechanism, to achieve these results. Additionally, we examine the application of differential privacy in auctions across both asymptotic and non-asymptotic regimes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10622v1-abstract-full').style.display = 'none'; document.getElementById('2405.10622v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.15976">arXiv:2404.15976</a> <span> [<a href="https://arxiv.org/pdf/2404.15976">pdf</a>, <a href="https://arxiv.org/format/2404.15976">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1111/cgf.15112">10.1111/cgf.15112 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The State of the Art in Visual Analytics for 3D Urban Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Miranda%2C+F">Fabio Miranda</a>, <a href="/search/cs?searchtype=author&query=Ortner%2C+T">Thomas Ortner</a>, <a href="/search/cs?searchtype=author&query=Moreira%2C+G">Gustavo Moreira</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Maryam Hosseini</a>, <a href="/search/cs?searchtype=author&query=Vuckovic%2C+M">Milena Vuckovic</a>, <a href="/search/cs?searchtype=author&query=Biljecki%2C+F">Filip Biljecki</a>, <a href="/search/cs?searchtype=author&query=Silva%2C+C">Claudio Silva</a>, <a href="/search/cs?searchtype=author&query=Lage%2C+M">Marcos Lage</a>, <a href="/search/cs?searchtype=author&query=Ferreira%2C+N">Nivan Ferreira</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.15976v1-abstract-short" style="display: inline;"> Urbanization has amplified the importance of three-dimensional structures in urban environments for a wide range of phenomena that are of significant interest to diverse stakeholders. With the growing availability of 3D urban data, numerous studies have focused on developing visual analysis techniques tailored to the unique characteristics of urban environments. However, incorporating the third di… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15976v1-abstract-full').style.display = 'inline'; document.getElementById('2404.15976v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.15976v1-abstract-full" style="display: none;"> Urbanization has amplified the importance of three-dimensional structures in urban environments for a wide range of phenomena that are of significant interest to diverse stakeholders. With the growing availability of 3D urban data, numerous studies have focused on developing visual analysis techniques tailored to the unique characteristics of urban environments. However, incorporating the third dimension into visual analytics introduces additional challenges in designing effective visual tools to tackle urban data's diverse complexities. In this paper, we present a survey on visual analytics of 3D urban data. Our work characterizes published works along three main dimensions (why, what, and how), considering use cases, analysis tasks, data, visualizations, and interactions. We provide a fine-grained categorization of published works from visualization journals and conferences, as well as from a myriad of urban domains, including urban planning, architecture, and engineering. By incorporating perspectives from both urban and visualization experts, we identify literature gaps, motivate visualization researchers to understand challenges and opportunities, and indicate future research directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15976v1-abstract-full').style.display = 'none'; document.getElementById('2404.15976v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EuroVis 2024 (STAR track). Surveyed works available at https://urbantk.org/survey-3d</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13222">arXiv:2404.13222</a> <span> [<a href="https://arxiv.org/pdf/2404.13222">pdf</a>, <a href="https://arxiv.org/format/2404.13222">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Vim4Path: Self-Supervised Vision Mamba for Histopathology Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nasiri-Sarvi%2C+A">Ali Nasiri-Sarvi</a>, <a href="/search/cs?searchtype=author&query=Trinh%2C+V+Q">Vincent Quoc-Huy Trinh</a>, <a href="/search/cs?searchtype=author&query=Rivaz%2C+H">Hassan Rivaz</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+S">Mahdi S. Hosseini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13222v2-abstract-short" style="display: inline;"> Representation learning from Gigapixel Whole Slide Images (WSI) poses a significant challenge in computational pathology due to the complicated nature of tissue structures and the scarcity of labeled data. Multi-instance learning methods have addressed this challenge, leveraging image patches to classify slides utilizing pretrained models using Self-Supervised Learning (SSL) approaches. The perfor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13222v2-abstract-full').style.display = 'inline'; document.getElementById('2404.13222v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13222v2-abstract-full" style="display: none;"> Representation learning from Gigapixel Whole Slide Images (WSI) poses a significant challenge in computational pathology due to the complicated nature of tissue structures and the scarcity of labeled data. Multi-instance learning methods have addressed this challenge, leveraging image patches to classify slides utilizing pretrained models using Self-Supervised Learning (SSL) approaches. The performance of both SSL and MIL methods relies on the architecture of the feature encoder. This paper proposes leveraging the Vision Mamba (Vim) architecture, inspired by state space models, within the DINO framework for representation learning in computational pathology. We evaluate the performance of Vim against Vision Transformers (ViT) on the Camelyon16 dataset for both patch-level and slide-level classification. Our findings highlight Vim's enhanced performance compared to ViT, particularly at smaller scales, where Vim achieves an 8.21 increase in ROC AUC for models of similar size. An explainability analysis further highlights Vim's capabilities, which reveals that Vim uniquely emulates the pathologist workflow-unlike ViT. This alignment with human expert analysis highlights Vim's potential in practical diagnostic settings and contributes significantly to developing effective representation-learning algorithms in computational pathology. We release the codes and pretrained weights at \url{https://github.com/AtlasAnalyticsLab/Vim4Path}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13222v2-abstract-full').style.display = 'none'; document.getElementById('2404.13222v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in CVPR2024 (9th Workshop on Computer Vision for Microscopy Image Analysis)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00178">arXiv:2404.00178</a> <span> [<a href="https://arxiv.org/pdf/2404.00178">pdf</a>, <a href="https://arxiv.org/format/2404.00178">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Beyond Suspension: A Two-phase Methodology for Concluding Sports Leagues </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hassanzadeh%2C+A">Ali Hassanzadeh</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Mojtaba Hosseini</a>, <a href="/search/cs?searchtype=author&query=Turner%2C+J+G">John G. Turner</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00178v1-abstract-short" style="display: inline;"> Problem definition: Professional sports leagues may be suspended due to various reasons such as the recent COVID-19 pandemic. A critical question the league must address when re-opening is how to appropriately select a subset of the remaining games to conclude the season in a shortened time frame. Academic/practical relevance: Despite the rich literature on scheduling an entire season starting fro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00178v1-abstract-full').style.display = 'inline'; document.getElementById('2404.00178v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00178v1-abstract-full" style="display: none;"> Problem definition: Professional sports leagues may be suspended due to various reasons such as the recent COVID-19 pandemic. A critical question the league must address when re-opening is how to appropriately select a subset of the remaining games to conclude the season in a shortened time frame. Academic/practical relevance: Despite the rich literature on scheduling an entire season starting from a blank slate, concluding an existing season is quite different. Our approach attempts to achieve team rankings similar to that which would have resulted had the season been played out in full. Methodology: We propose a data-driven model which exploits predictive and prescriptive analytics to produce a schedule for the remainder of the season comprised of a subset of originally-scheduled games. Our model introduces novel rankings-based objectives within a stochastic optimization model, whose parameters are first estimated using a predictive model. We introduce a deterministic equivalent reformulation along with a tailored Frank-Wolfe algorithm to efficiently solve our problem, as well as a robust counterpart based on min-max regret. Results: We present simulation-based numerical experiments from previous National Basketball Association (NBA) seasons 2004--2019, and show that our models are computationally efficient, outperform a greedy benchmark that approximates a non-rankings-based scheduling policy, and produce interpretable results. Managerial implications: Our data-driven decision-making framework may be used to produce a shortened season with 25-50\% fewer games while still producing an end-of-season ranking similar to that of the full season, had it been played. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00178v1-abstract-full').style.display = 'none'; document.getElementById('2404.00178v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">32 pages, 9 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 90B50 (Primary) 90C06; 90C11; 90C90 (Secondary) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.08077">arXiv:2403.08077</a> <span> [<a href="https://arxiv.org/pdf/2403.08077">pdf</a>, <a href="https://arxiv.org/format/2403.08077">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Multimodal Intermediate Fusion Network with Manifold Learning for Stress Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bodaghi%2C+M">Morteza Bodaghi</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Majid Hosseini</a>, <a href="/search/cs?searchtype=author&query=Gottumukkala%2C+R">Raju Gottumukkala</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.08077v1-abstract-short" style="display: inline;"> Multimodal deep learning methods capture synergistic features from multiple modalities and have the potential to improve accuracy for stress detection compared to unimodal methods. However, this accuracy gain typically comes from high computational cost due to the high-dimensional feature spaces, especially for intermediate fusion. Dimensionality reduction is one way to optimize multimodal learnin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08077v1-abstract-full').style.display = 'inline'; document.getElementById('2403.08077v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.08077v1-abstract-full" style="display: none;"> Multimodal deep learning methods capture synergistic features from multiple modalities and have the potential to improve accuracy for stress detection compared to unimodal methods. However, this accuracy gain typically comes from high computational cost due to the high-dimensional feature spaces, especially for intermediate fusion. Dimensionality reduction is one way to optimize multimodal learning by simplifying data and making the features more amenable to processing and analysis, thereby reducing computational complexity. This paper introduces an intermediate multimodal fusion network with manifold learning-based dimensionality reduction. The multimodal network generates independent representations from biometric signals and facial landmarks through 1D-CNN and 2D-CNN. Finally, these features are fused and fed to another 1D-CNN layer, followed by a fully connected dense layer. We compared various dimensionality reduction techniques for different variations of unimodal and multimodal networks. We observe that the intermediate-level fusion with the Multi-Dimensional Scaling (MDS) manifold method showed promising results with an accuracy of 96.00\% in a Leave-One-Subject-Out Cross-Validation (LOSO-CV) paradigm over other dimensional reduction methods. MDS had the highest computational cost among manifold learning methods. However, while outperforming other networks, it managed to reduce the computational cost of the proposed networks by 25\% when compared to six well-known conventional feature selection methods used in the preprocessing step. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08077v1-abstract-full').style.display = 'none'; document.getElementById('2403.08077v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work was accepted to The 3rd International Conference on Computing and Machine Intelligence (ICMI 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.01643">arXiv:2403.01643</a> <span> [<a href="https://arxiv.org/pdf/2403.01643">pdf</a>, <a href="https://arxiv.org/format/2403.01643">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Cost-Effective Attention Mechanisms for Low Resource Settings: Necessity & Sufficiency of Linear Transformations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+P">Peyman Hosseini</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Mehran Hosseini</a>, <a href="/search/cs?searchtype=author&query=Castro%2C+I">Ignacio Castro</a>, <a href="/search/cs?searchtype=author&query=Purver%2C+M">Matthew Purver</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.01643v3-abstract-short" style="display: inline;"> From natural language processing to vision, Scaled Dot Product Attention (SDPA) is the backbone of most modern deep learning applications. Unfortunately, its memory and computational requirements can be prohibitive in low-resource settings. In this paper, we improve its efficiency without sacrificing its versatility. We propose three attention variants where we remove consecutive linear transforma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01643v3-abstract-full').style.display = 'inline'; document.getElementById('2403.01643v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.01643v3-abstract-full" style="display: none;"> From natural language processing to vision, Scaled Dot Product Attention (SDPA) is the backbone of most modern deep learning applications. Unfortunately, its memory and computational requirements can be prohibitive in low-resource settings. In this paper, we improve its efficiency without sacrificing its versatility. We propose three attention variants where we remove consecutive linear transformations or add a novel one, and evaluate them on a range of standard NLP and vision tasks. Our proposed models are substantially lighter than standard SDPA (and have 25-50% fewer parameters). We show that the performance cost of these changes is negligible relative to size reduction and that in one case (Super Attention) we succeed in outperforming SDPA by up to 10% while improving its speed and reducing its parameters by 25%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01643v3-abstract-full').style.display = 'none'; document.getElementById('2403.01643v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T07 (Primary) 68T45; 68T50; 68T10; 15A03; 15A04 (Secondary) <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6; I.2.7; I.2.10; I.4.0; I.5.0; I.7.0 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.17169">arXiv:2402.17169</a> <span> [<a href="https://arxiv.org/pdf/2402.17169">pdf</a>, <a href="https://arxiv.org/format/2402.17169">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TBDATA.2024.3382964">10.1109/TBDATA.2024.3382964 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deep Umbra: A Generative Approach for Sunlight Access Computation in Urban Spaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Omar%2C+K+S">Kazi Shahrukh Omar</a>, <a href="/search/cs?searchtype=author&query=Moreira%2C+G">Gustavo Moreira</a>, <a href="/search/cs?searchtype=author&query=Hodczak%2C+D">Daniel Hodczak</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Maryam Hosseini</a>, <a href="/search/cs?searchtype=author&query=Colaninno%2C+N">Nicola Colaninno</a>, <a href="/search/cs?searchtype=author&query=Lage%2C+M">Marcos Lage</a>, <a href="/search/cs?searchtype=author&query=Miranda%2C+F">Fabio Miranda</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.17169v1-abstract-short" style="display: inline;"> Sunlight and shadow play critical roles in how urban spaces are utilized, thrive, and grow. While access to sunlight is essential to the success of urban environments, shadows can provide shaded places to stay during the hot seasons, mitigate heat island effect, and increase pedestrian comfort levels. Properly quantifying sunlight access and shadows in large urban environments is key in tackling s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17169v1-abstract-full').style.display = 'inline'; document.getElementById('2402.17169v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.17169v1-abstract-full" style="display: none;"> Sunlight and shadow play critical roles in how urban spaces are utilized, thrive, and grow. While access to sunlight is essential to the success of urban environments, shadows can provide shaded places to stay during the hot seasons, mitigate heat island effect, and increase pedestrian comfort levels. Properly quantifying sunlight access and shadows in large urban environments is key in tackling some of the important challenges facing cities today. In this paper, we propose Deep Umbra, a novel computational framework that enables the quantification of sunlight access and shadows at a global scale. Our framework is based on a conditional generative adversarial network that considers the physical form of cities to compute high-resolution spatial information of accumulated sunlight access for the different seasons of the year. We use data from seven different cities to train our model, and show, through an extensive set of experiments, its low overall RMSE (below 0.1) as well as its extensibility to cities that were not part of the training set. Additionally, we contribute a set of case studies and a comprehensive dataset with sunlight access information for more than 100 cities across six continents of the world. Deep Umbra is available at https://urbantk.org/shadows. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17169v1-abstract-full').style.display = 'none'; document.getElementById('2402.17169v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at IEEE Transactions on Big Data. Deep Umbra is available at https://urbantk.org/shadows</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.12368">arXiv:2402.12368</a> <span> [<a href="https://arxiv.org/pdf/2402.12368">pdf</a>, <a href="https://arxiv.org/format/2402.12368">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A synthetic data approach for domain generalization of NLI models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+J">Mohammad Javad Hosseini</a>, <a href="/search/cs?searchtype=author&query=Petrov%2C+A">Andrey Petrov</a>, <a href="/search/cs?searchtype=author&query=Fabrikant%2C+A">Alex Fabrikant</a>, <a href="/search/cs?searchtype=author&query=Louis%2C+A">Annie Louis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.12368v2-abstract-short" style="display: inline;"> Natural Language Inference (NLI) remains an important benchmark task for LLMs. NLI datasets are a springboard for transfer learning to other semantic tasks, and NLI models are standard tools for identifying the faithfulness of model-generated text. There are several large scale NLI datasets today, and models have improved greatly by hill-climbing on these collections. Yet their realistic performan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.12368v2-abstract-full').style.display = 'inline'; document.getElementById('2402.12368v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.12368v2-abstract-full" style="display: none;"> Natural Language Inference (NLI) remains an important benchmark task for LLMs. NLI datasets are a springboard for transfer learning to other semantic tasks, and NLI models are standard tools for identifying the faithfulness of model-generated text. There are several large scale NLI datasets today, and models have improved greatly by hill-climbing on these collections. Yet their realistic performance on out-of-distribution/domain data is less well-understood. We explore the opportunity for synthetic high-quality datasets to adapt NLI models for zero-shot use in downstream applications across new and unseen text domains. We demonstrate a new approach for generating NLI data in diverse domains and lengths, so far not covered by existing training sets. The resulting examples have meaningful premises, the hypotheses are formed in creative ways rather than simple edits to a few premise tokens, and the labels have high accuracy. We show that models trained on this data ($685$K synthetic examples) have the best generalization to completely new downstream test settings. On the TRUE benchmark, a T5-small model trained with our data improves around $7\%$ on average compared to training on the best alternative dataset. The improvements are more pronounced for smaller models, while still meaningful on a T5 XXL model. We also demonstrate gains on test sets when in-domain training data is augmented with our domain-general synthetic data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.12368v2-abstract-full').style.display = 'none'; document.getElementById('2402.12368v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.11627">arXiv:2401.11627</a> <span> [<a href="https://arxiv.org/pdf/2401.11627">pdf</a>, <a href="https://arxiv.org/format/2401.11627">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> </div> <p class="title is-5 mathjax"> Tight Verification of Probabilistic Robustness in Bayesian Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Batten%2C+B">Ben Batten</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Mehran Hosseini</a>, <a href="/search/cs?searchtype=author&query=Lomuscio%2C+A">Alessio Lomuscio</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.11627v2-abstract-short" style="display: inline;"> We introduce two algorithms for computing tight guarantees on the probabilistic robustness of Bayesian Neural Networks (BNNs). Computing robustness guarantees for BNNs is a significantly more challenging task than verifying the robustness of standard Neural Networks (NNs) because it requires searching the parameters' space for safe weights. Moreover, tight and complete approaches for the verificat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.11627v2-abstract-full').style.display = 'inline'; document.getElementById('2401.11627v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.11627v2-abstract-full" style="display: none;"> We introduce two algorithms for computing tight guarantees on the probabilistic robustness of Bayesian Neural Networks (BNNs). Computing robustness guarantees for BNNs is a significantly more challenging task than verifying the robustness of standard Neural Networks (NNs) because it requires searching the parameters' space for safe weights. Moreover, tight and complete approaches for the verification of standard NNs, such as those based on Mixed-Integer Linear Programming (MILP), cannot be directly used for the verification of BNNs because of the polynomial terms resulting from the consecutive multiplication of variables encoding the weights. Our algorithms efficiently and effectively search the parameters' space for safe weights by using iterative expansion and the network's gradient and can be used with any verification algorithm of choice for BNNs. In addition to proving that our algorithms compute tighter bounds than the SoA, we also evaluate our algorithms against the SoA on standard benchmarks, such as MNIST and CIFAR10, showing that our algorithms compute bounds up to 40% tighter than the SoA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.11627v2-abstract-full').style.display = 'none'; document.getElementById('2401.11627v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AISTATS 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T27 (Primary) 68T45; 68T07; 68T01 (Secondary) <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.0; I.2.4; F.3.1; D.2.4 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.01951">arXiv:2401.01951</a> <span> [<a href="https://arxiv.org/pdf/2401.01951">pdf</a>, <a href="https://arxiv.org/format/2401.01951">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GeoPos: A Minimal Positional Encoding for Enhanced Fine-Grained Details in Image Synthesis Using Convolutional Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Mehran Hosseini</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+P">Peyman Hosseini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.01951v2-abstract-short" style="display: inline;"> The enduring inability of image generative models to recreate intricate geometric features, such as those present in human hands and fingers has been an ongoing problem in image generation for nearly a decade. While strides have been made by increasing model sizes and diversifying training datasets, this issue remains prevalent across all models, from denoising diffusion models to Generative Adver… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01951v2-abstract-full').style.display = 'inline'; document.getElementById('2401.01951v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.01951v2-abstract-full" style="display: none;"> The enduring inability of image generative models to recreate intricate geometric features, such as those present in human hands and fingers has been an ongoing problem in image generation for nearly a decade. While strides have been made by increasing model sizes and diversifying training datasets, this issue remains prevalent across all models, from denoising diffusion models to Generative Adversarial Networks (GAN), pointing to a fundamental shortcoming in the underlying architectures. In this paper, we demonstrate how this problem can be mitigated by augmenting convolution layers geometric capabilities through providing them with a single input channel incorporating the relative n-dimensional Cartesian coordinate system. We show this drastically improves quality of images generated by Diffusion Models, GANs, and Variational AutoEncoders (VAE). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01951v2-abstract-full').style.display = 'none'; document.getElementById('2401.01951v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at WACV 2025. Contains 19 pages, 15 figures, and 9 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 51 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.10; I.4.0; I.4.10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.03606">arXiv:2311.03606</a> <span> [<a href="https://arxiv.org/pdf/2311.03606">pdf</a>, <a href="https://arxiv.org/format/2311.03606">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Stress Detection Using Facial Landmarks and Biometric Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Majid Hosseini</a>, <a href="/search/cs?searchtype=author&query=Bodaghi%2C+M">Morteza Bodaghi</a>, <a href="/search/cs?searchtype=author&query=Bhupatiraju%2C+R+T">Ravi Teja Bhupatiraju</a>, <a href="/search/cs?searchtype=author&query=Maida%2C+A">Anthony Maida</a>, <a href="/search/cs?searchtype=author&query=Gottumukkala%2C+R">Raju Gottumukkala</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.03606v1-abstract-short" style="display: inline;"> The development of various sensing technologies is improving measurements of stress and the well-being of individuals. Although progress has been made with single signal modalities like wearables and facial emotion recognition, integrating multiple modalities provides a more comprehensive understanding of stress, given that stress manifests differently across different people. Multi-modal learning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.03606v1-abstract-full').style.display = 'inline'; document.getElementById('2311.03606v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.03606v1-abstract-full" style="display: none;"> The development of various sensing technologies is improving measurements of stress and the well-being of individuals. Although progress has been made with single signal modalities like wearables and facial emotion recognition, integrating multiple modalities provides a more comprehensive understanding of stress, given that stress manifests differently across different people. Multi-modal learning aims to capitalize on the strength of each modality rather than relying on a single signal. Given the complexity of processing and integrating high-dimensional data from limited subjects, more research is needed. Numerous research efforts have been focused on fusing stress and emotion signals at an early stage, e.g., feature-level fusion using basic machine learning methods and 1D-CNN Methods. This paper proposes a multi-modal learning approach for stress detection that integrates facial landmarks and biometric signals. We test this multi-modal integration with various early-fusion and late-fusion techniques to integrate the 1D-CNN model from biometric signals and 2-D CNN using facial landmarks. We evaluate these architectures using a rigorous test of models' generalizability using the leave-one-subject-out mechanism, i.e., all samples related to a single subject are left out to train the model. Our findings show that late-fusion achieved 94.39\% accuracy, and early-fusion surpassed it with a 98.38\% accuracy rate. This research contributes valuable insights into enhancing stress detection through a multi-modal approach. The proposed research offers important knowledge in improving stress detection using a multi-modal approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.03606v1-abstract-full').style.display = 'none'; document.getElementById('2311.03606v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.05150">arXiv:2309.05150</a> <span> [<a href="https://arxiv.org/pdf/2309.05150">pdf</a>, <a href="https://arxiv.org/format/2309.05150">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Faster, Lighter, More Accurate: A Deep Learning Ensemble for Content Moderation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Mohammad Hosseini</a>, <a href="/search/cs?searchtype=author&query=Hasan%2C+M">Mahmudul Hasan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.05150v1-abstract-short" style="display: inline;"> To address the increasing need for efficient and accurate content moderation, we propose an efficient and lightweight deep classification ensemble structure. Our approach is based on a combination of simple visual features, designed for high-accuracy classification of violent content with low false positives. Our ensemble architecture utilizes a set of lightweight models with narrowed-down color f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.05150v1-abstract-full').style.display = 'inline'; document.getElementById('2309.05150v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.05150v1-abstract-full" style="display: none;"> To address the increasing need for efficient and accurate content moderation, we propose an efficient and lightweight deep classification ensemble structure. Our approach is based on a combination of simple visual features, designed for high-accuracy classification of violent content with low false positives. Our ensemble architecture utilizes a set of lightweight models with narrowed-down color features, and we apply it to both images and videos. We evaluated our approach using a large dataset of explosion and blast contents and compared its performance to popular deep learning models such as ResNet-50. Our evaluation results demonstrate significant improvements in prediction accuracy, while benefiting from 7.64x faster inference and lower computation cost. While our approach is tailored to explosion detection, it can be applied to other similar content moderation and violence detection use cases as well. Based on our experiments, we propose a "think small, think many" philosophy in classification scenarios. We argue that transforming a single, large, monolithic deep model into a verification-based step model ensemble of multiple small, simple, and lightweight models with narrowed-down visual features can possibly lead to predictions with higher accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.05150v1-abstract-full').style.display = 'none'; document.getElementById('2309.05150v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 22nd IEEE International Conference on Machine Learning and Applications (IEEE ICMLA'23), December 15-17, 2023, Jacksonville Riverfront, Florida, USA. arXiv admin note: substantial text overlap with arXiv:2103.10350</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.07769">arXiv:2308.07769</a> <span> [<a href="https://arxiv.org/pdf/2308.07769">pdf</a>, <a href="https://arxiv.org/format/2308.07769">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TVCG.2023.3326598">10.1109/TVCG.2023.3326598 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The Urban Toolkit: A Grammar-based Framework for Urban Visual Analytics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Moreira%2C+G">Gustavo Moreira</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Maryam Hosseini</a>, <a href="/search/cs?searchtype=author&query=Nipu%2C+M+N+A">Md Nafiul Alam Nipu</a>, <a href="/search/cs?searchtype=author&query=Lage%2C+M">Marcos Lage</a>, <a href="/search/cs?searchtype=author&query=Ferreira%2C+N">Nivan Ferreira</a>, <a href="/search/cs?searchtype=author&query=Miranda%2C+F">Fabio Miranda</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.07769v1-abstract-short" style="display: inline;"> While cities around the world are looking for smart ways to use new advances in data collection, management, and analysis to address their problems, the complex nature of urban issues and the overwhelming amount of available data have posed significant challenges in translating these efforts into actionable insights. In the past few years, urban visual analytics tools have significantly helped tac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07769v1-abstract-full').style.display = 'inline'; document.getElementById('2308.07769v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.07769v1-abstract-full" style="display: none;"> While cities around the world are looking for smart ways to use new advances in data collection, management, and analysis to address their problems, the complex nature of urban issues and the overwhelming amount of available data have posed significant challenges in translating these efforts into actionable insights. In the past few years, urban visual analytics tools have significantly helped tackle these challenges. When analyzing a feature of interest, an urban expert must transform, integrate, and visualize different thematic (e.g., sunlight access, demographic) and physical (e.g., buildings, street networks) data layers, oftentimes across multiple spatial and temporal scales. However, integrating and analyzing these layers require expertise in different fields, increasing development time and effort. This makes the entire visual data exploration and system implementation difficult for programmers and also sets a high entry barrier for urban experts outside of computer science. With this in mind, in this paper, we present the Urban Toolkit (UTK), a flexible and extensible visualization framework that enables the easy authoring of web-based visualizations through a new high-level grammar specifically built with common urban use cases in mind. In order to facilitate the integration and visualization of different urban data, we also propose the concept of knots to merge thematic and physical urban layers. We evaluate our approach through use cases and a series of interviews with experts and practitioners from different domains, including urban accessibility, urban planning, architecture, and climate science. UTK is available at urbantk.org. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07769v1-abstract-full').style.display = 'none'; document.getElementById('2308.07769v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at IEEE VIS 2023. UTK is available at http://urbantk.org</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Published in: IEEE Transactions on Visualization and Computer Graphics ( Volume: 30, Issue: 1, January 2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.03936">arXiv:2308.03936</a> <span> [<a href="https://arxiv.org/pdf/2308.03936">pdf</a>, <a href="https://arxiv.org/format/2308.03936">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ALFA -- Leveraging All Levels of Feature Abstraction for Enhancing the Generalization of Histopathology Image Classification Across Unseen Hospitals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sikaroudi%2C+M">Milad Sikaroudi</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Maryam Hosseini</a>, <a href="/search/cs?searchtype=author&query=Rahnamayan%2C+S">Shahryar Rahnamayan</a>, <a href="/search/cs?searchtype=author&query=Tizhoosh%2C+H+R">H. R. Tizhoosh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.03936v2-abstract-short" style="display: inline;"> We propose an exhaustive methodology that leverages all levels of feature abstraction, targeting an enhancement in the generalizability of image classification to unobserved hospitals. Our approach incorporates augmentation-based self-supervision with common distribution shifts in histopathology scenarios serving as the pretext task. This enables us to derive invariant features from training image… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.03936v2-abstract-full').style.display = 'inline'; document.getElementById('2308.03936v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.03936v2-abstract-full" style="display: none;"> We propose an exhaustive methodology that leverages all levels of feature abstraction, targeting an enhancement in the generalizability of image classification to unobserved hospitals. Our approach incorporates augmentation-based self-supervision with common distribution shifts in histopathology scenarios serving as the pretext task. This enables us to derive invariant features from training images without relying on training labels, thereby covering different abstraction levels. Moving onto the subsequent abstraction level, we employ a domain alignment module to facilitate further extraction of invariant features across varying training hospitals. To represent the highly specific features of participating hospitals, an encoder is trained to classify hospital labels, independent of their diagnostic labels. The features from each of these encoders are subsequently disentangled to minimize redundancy and segregate the features. This representation, which spans a broad spectrum of semantic information, enables the development of a model demonstrating increased robustness to unseen images from disparate distributions. Experimental results from the PACS dataset (a domain generalization benchmark), a synthetic dataset created by applying histopathology-specific jitters to the MHIST dataset (defining different domains with varied distribution shifts), and a Renal Cell Carcinoma dataset derived from four image repositories from TCGA, collectively indicate that our proposed model is adept at managing varying levels of image granularity. Thus, it shows improved generalizability when faced with new, out-of-distribution hospital images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.03936v2-abstract-full').style.display = 'none'; document.getElementById('2308.03936v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at ICCV 2023, Computer Vision for Automated Medical Diagnosis Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.03967">arXiv:2307.03967</a> <span> [<a href="https://arxiv.org/pdf/2307.03967">pdf</a>, <a href="https://arxiv.org/format/2307.03967">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> End-to-End Supervised Multilabel Contrastive Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sajedi%2C+A">Ahmad Sajedi</a>, <a href="/search/cs?searchtype=author&query=Khaki%2C+S">Samir Khaki</a>, <a href="/search/cs?searchtype=author&query=Plataniotis%2C+K+N">Konstantinos N. Plataniotis</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+S">Mahdi S. Hosseini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.03967v1-abstract-short" style="display: inline;"> Multilabel representation learning is recognized as a challenging problem that can be associated with either label dependencies between object categories or data-related issues such as the inherent imbalance of positive/negative samples. Recent advances address these challenges from model- and data-centric viewpoints. In model-centric, the label correlation is obtained by an external model designs… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03967v1-abstract-full').style.display = 'inline'; document.getElementById('2307.03967v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.03967v1-abstract-full" style="display: none;"> Multilabel representation learning is recognized as a challenging problem that can be associated with either label dependencies between object categories or data-related issues such as the inherent imbalance of positive/negative samples. Recent advances address these challenges from model- and data-centric viewpoints. In model-centric, the label correlation is obtained by an external model designs (e.g., graph CNN) to incorporate an inductive bias for training. However, they fail to design an end-to-end training framework, leading to high computational complexity. On the contrary, in data-centric, the realistic nature of the dataset is considered for improving the classification while ignoring the label dependencies. In this paper, we propose a new end-to-end training framework -- dubbed KMCL (Kernel-based Mutlilabel Contrastive Learning) -- to address the shortcomings of both model- and data-centric designs. The KMCL first transforms the embedded features into a mixture of exponential kernels in Gaussian RKHS. It is then followed by encoding an objective loss that is comprised of (a) reconstruction loss to reconstruct kernel representation, (b) asymmetric classification loss to address the inherent imbalance problem, and (c) contrastive loss to capture label correlation. The KMCL models the uncertainty of the feature encoder while maintaining a low computational footprint. Extensive experiments are conducted on image classification tasks to showcase the consistent improvements of KMCL over the SOTA methods. PyTorch implementation is provided in \url{https://github.com/mahdihosseini/KMCL}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03967v1-abstract-full').style.display = 'none'; document.getElementById('2307.03967v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.19585">arXiv:2305.19585</a> <span> [<a href="https://arxiv.org/pdf/2305.19585">pdf</a>, <a href="https://arxiv.org/format/2305.19585">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LAIT: Efficient Multi-Segment Encoding in Transformers with Layer-Adjustable Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Milbauer%2C+J">Jeremiah Milbauer</a>, <a href="/search/cs?searchtype=author&query=Louis%2C+A">Annie Louis</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+J">Mohammad Javad Hosseini</a>, <a href="/search/cs?searchtype=author&query=Fabrikant%2C+A">Alex Fabrikant</a>, <a href="/search/cs?searchtype=author&query=Metzler%2C+D">Donald Metzler</a>, <a href="/search/cs?searchtype=author&query=Schuster%2C+T">Tal Schuster</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.19585v1-abstract-short" style="display: inline;"> Transformer encoders contextualize token representations by attending to all other tokens at each layer, leading to quadratic increase in compute effort with the input length. In practice, however, the input text of many NLP tasks can be seen as a sequence of related segments (e.g., the sequence of sentences within a passage, or the hypothesis and premise in NLI). While attending across these segm… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.19585v1-abstract-full').style.display = 'inline'; document.getElementById('2305.19585v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.19585v1-abstract-full" style="display: none;"> Transformer encoders contextualize token representations by attending to all other tokens at each layer, leading to quadratic increase in compute effort with the input length. In practice, however, the input text of many NLP tasks can be seen as a sequence of related segments (e.g., the sequence of sentences within a passage, or the hypothesis and premise in NLI). While attending across these segments is highly beneficial for many tasks, we hypothesize that this interaction can be delayed until later encoding stages. To this end, we introduce Layer-Adjustable Interactions in Transformers (LAIT). Within LAIT, segmented inputs are first encoded independently, and then jointly. This partial two-tower architecture bridges the gap between a Dual Encoder's ability to pre-compute representations for segments and a fully self-attentive Transformer's capacity to model cross-segment attention. The LAIT framework effectively leverages existing pretrained Transformers and converts them into the hybrid of the two aforementioned architectures, allowing for easy and intuitive control over the performance-efficiency tradeoff. Experimenting on a wide range of NLP tasks, we find LAIT able to reduce 30-50% of the attention FLOPs on many tasks, while preserving high accuracy; in some practical settings, LAIT could reduce actual latency by orders of magnitude. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.19585v1-abstract-full').style.display = 'none'; document.getElementById('2305.19585v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14552">arXiv:2305.14552</a> <span> [<a href="https://arxiv.org/pdf/2305.14552">pdf</a>, <a href="https://arxiv.org/format/2305.14552">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Sources of Hallucination by Large Language Models on Inference Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=McKenna%2C+N">Nick McKenna</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Tianyi Li</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+L">Liang Cheng</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+J">Mohammad Javad Hosseini</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+M">Mark Johnson</a>, <a href="/search/cs?searchtype=author&query=Steedman%2C+M">Mark Steedman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14552v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) are claimed to be capable of Natural Language Inference (NLI), necessary for applied tasks like question answering and summarization. We present a series of behavioral studies on several LLM families (LLaMA, GPT-3.5, and PaLM) which probe their behavior using controlled experiments. We establish two biases originating from pretraining which predict much of their behavi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14552v2-abstract-full').style.display = 'inline'; document.getElementById('2305.14552v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14552v2-abstract-full" style="display: none;"> Large Language Models (LLMs) are claimed to be capable of Natural Language Inference (NLI), necessary for applied tasks like question answering and summarization. We present a series of behavioral studies on several LLM families (LLaMA, GPT-3.5, and PaLM) which probe their behavior using controlled experiments. We establish two biases originating from pretraining which predict much of their behavior, and show that these are major sources of hallucination in generative LLMs. First, memorization at the level of sentences: we show that, regardless of the premise, models falsely label NLI test samples as entailing when the hypothesis is attested in training data, and that entities are used as ``indices'' to access the memorized data. Second, statistical patterns of usage learned at the level of corpora: we further show a similar effect when the premise predicate is less frequent than that of the hypothesis in the training data, a bias following from previous studies. We demonstrate that LLMs perform significantly worse on NLI test samples which do not conform to these biases than those which do, and we offer these as valuable controls for future LLM evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14552v2-abstract-full').style.display = 'none'; document.getElementById('2305.14552v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Findings of EMNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.06907">arXiv:2304.06907</a> <span> [<a href="https://arxiv.org/pdf/2304.06907">pdf</a>, <a href="https://arxiv.org/format/2304.06907">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s11554-022-01210-6">10.1007/s11554-022-01210-6 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Toward Real-Time Image Annotation Using Marginalized Coupled Dictionary Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Roostaiyan%2C+S+M">Seyed Mahdi Roostaiyan</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+M">Mohammad Mehdi Hosseini</a>, <a href="/search/cs?searchtype=author&query=Kashani%2C+M+M">Mahya Mohammadi Kashani</a>, <a href="/search/cs?searchtype=author&query=Amiri%2C+S+H">S. Hamid Amiri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.06907v2-abstract-short" style="display: inline;"> In most image retrieval systems, images include various high-level semantics, called tags or annotations. Virtually all the state-of-the-art image annotation methods that handle imbalanced labeling are search-based techniques which are time-consuming. In this paper, a novel coupled dictionary learning approach is proposed to learn a limited number of visual prototypes and their corresponding seman… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.06907v2-abstract-full').style.display = 'inline'; document.getElementById('2304.06907v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.06907v2-abstract-full" style="display: none;"> In most image retrieval systems, images include various high-level semantics, called tags or annotations. Virtually all the state-of-the-art image annotation methods that handle imbalanced labeling are search-based techniques which are time-consuming. In this paper, a novel coupled dictionary learning approach is proposed to learn a limited number of visual prototypes and their corresponding semantics simultaneously. This approach leads to a real-time image annotation procedure. Another contribution of this paper is that utilizes a marginalized loss function instead of the squared loss function that is inappropriate for image annotation with imbalanced labels. We have employed a marginalized loss function in our method to leverage a simple and effective method of prototype updating. Meanwhile, we have introduced ${\ell}_1$ regularization on semantic prototypes to preserve the sparse and imbalanced nature of labels in learned semantic prototypes. Finally, comprehensive experimental results on various datasets demonstrate the efficiency of the proposed method for image annotation tasks in terms of accuracy and time. The reference implementation is publicly available on https://github.com/hamid-amiri/MCDL-Image-Annotation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.06907v2-abstract-full').style.display = 'none'; document.getElementById('2304.06907v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">@article{roostaiyan2022toward, title={Toward real-time image annotation using marginalized coupled dictionary learning}, author={Roostaiyan, Seyed Mahdi and Hosseini, Mohammad Mehdi and Kashani, Mahya Mohammadi and Amiri, S Hamid}, journal={Journal of Real-Time Image Processing}, volume={19}, number={3}, pages={623--638}, year={2022}, publisher={Springer} }</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Journal of Real-Time Image Processing. 2022 Jun;19(3):623-38 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.05482">arXiv:2304.05482</a> <span> [<a href="https://arxiv.org/pdf/2304.05482">pdf</a>, <a href="https://arxiv.org/format/2304.05482">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Computational Pathology: A Survey Review and The Way Forward </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+M+S">Mahdi S. Hosseini</a>, <a href="/search/cs?searchtype=author&query=Bejnordi%2C+B+E">Babak Ehteshami Bejnordi</a>, <a href="/search/cs?searchtype=author&query=Trinh%2C+V+Q">Vincent Quoc-Huy Trinh</a>, <a href="/search/cs?searchtype=author&query=Hasan%2C+D">Danial Hasan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xingwen Li</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+T">Taehyo Kim</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haochen Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+T">Theodore Wu</a>, <a href="/search/cs?searchtype=author&query=Chinniah%2C+K">Kajanan Chinniah</a>, <a href="/search/cs?searchtype=author&query=Maghsoudlou%2C+S">Sina Maghsoudlou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ryan Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Stephen Yang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jiadai Zhu</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+L">Lyndon Chan</a>, <a href="/search/cs?searchtype=author&query=Khaki%2C+S">Samir Khaki</a>, <a href="/search/cs?searchtype=author&query=Buin%2C+A">Andrei Buin</a>, <a href="/search/cs?searchtype=author&query=Chaji%2C+F">Fatemeh Chaji</a>, <a href="/search/cs?searchtype=author&query=Salehi%2C+A">Ala Salehi</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+B+N">Bich Ngoc Nguyen</a>, <a href="/search/cs?searchtype=author&query=Samaras%2C+D">Dimitris Samaras</a>, <a href="/search/cs?searchtype=author&query=Plataniotis%2C+K+N">Konstantinos N. Plataniotis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.05482v3-abstract-short" style="display: inline;"> Computational Pathology CPath is an interdisciplinary science that augments developments of computational approaches to analyze and model medical histopathology images. The main objective for CPath is to develop infrastructure and workflows of digital diagnostics as an assistive CAD system for clinical pathology, facilitating transformational changes in the diagnosis and treatment of cancer that a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.05482v3-abstract-full').style.display = 'inline'; document.getElementById('2304.05482v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.05482v3-abstract-full" style="display: none;"> Computational Pathology CPath is an interdisciplinary science that augments developments of computational approaches to analyze and model medical histopathology images. The main objective for CPath is to develop infrastructure and workflows of digital diagnostics as an assistive CAD system for clinical pathology, facilitating transformational changes in the diagnosis and treatment of cancer that are mainly address by CPath tools. With evergrowing developments in deep learning and computer vision algorithms, and the ease of the data flow from digital pathology, currently CPath is witnessing a paradigm shift. Despite the sheer volume of engineering and scientific works being introduced for cancer image analysis, there is still a considerable gap of adopting and integrating these algorithms in clinical practice. This raises a significant question regarding the direction and trends that are undertaken in CPath. In this article we provide a comprehensive review of more than 800 papers to address the challenges faced in problem design all-the-way to the application and implementation viewpoints. We have catalogued each paper into a model-card by examining the key works and challenges faced to layout the current landscape in CPath. We hope this helps the community to locate relevant works and facilitate understanding of the field's future directions. In a nutshell, we oversee the CPath developments in cycle of stages which are required to be cohesively linked together to address the challenges associated with such multidisciplinary science. We overview this cycle from different perspectives of data-centric, model-centric, and application-centric problems. We finally sketch remaining challenges and provide directions for future technical developments and clinical integration of CPath (https://github.com/AtlasAnalyticsLab/CPath_Survey). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.05482v3-abstract-full').style.display = 'none'; document.getElementById('2304.05482v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in Elsevier Journal of Pathology Informatics (JPI) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.02468">arXiv:2303.02468</a> <span> [<a href="https://arxiv.org/pdf/2303.02468">pdf</a>, <a href="https://arxiv.org/format/2303.02468">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.18653/v1/2023.semeval-1.185">10.18653/v1/2023.semeval-1.185 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Lon-ea at SemEval-2023 Task 11: A Comparison of Activation Functions for Soft and Hard Label Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hosseini%2C+P">Peyman Hosseini</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Mehran Hosseini</a>, <a href="/search/cs?searchtype=author&query=Al-Azzawi%2C+S+S">Sana Sabah Al-Azzawi</a>, <a href="/search/cs?searchtype=author&query=Liwicki%2C+M">Marcus Liwicki</a>, <a href="/search/cs?searchtype=author&query=Castro%2C+I">Ignacio Castro</a>, <a href="/search/cs?searchtype=author&query=Purver%2C+M">Matthew Purver</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.02468v4-abstract-short" style="display: inline;"> We study the influence of different activation functions in the output layer of deep neural network models for soft and hard label prediction in the learning with disagreement task. In this task, the goal is to quantify the amount of disagreement via predicting soft labels. To predict the soft labels, we use BERT-based preprocessors and encoders and vary the activation function used in the output… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.02468v4-abstract-full').style.display = 'inline'; document.getElementById('2303.02468v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.02468v4-abstract-full" style="display: none;"> We study the influence of different activation functions in the output layer of deep neural network models for soft and hard label prediction in the learning with disagreement task. In this task, the goal is to quantify the amount of disagreement via predicting soft labels. To predict the soft labels, we use BERT-based preprocessors and encoders and vary the activation function used in the output layer, while keeping other parameters constant. The soft labels are then used for the hard label prediction. The activation functions considered are sigmoid as well as a step-function that is added to the model post-training and a sinusoidal activation function, which is introduced for the first time in this paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.02468v4-abstract-full').style.display = 'none'; document.getElementById('2303.02468v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in ACL 2023 SemEval Workshop as selected task paper</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.02956">arXiv:2302.02956</a> <span> [<a href="https://arxiv.org/pdf/2302.02956">pdf</a>, <a href="https://arxiv.org/format/2302.02956">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> RoboCup 2022 AdultSize Winner NimbRo: Upgraded Perception, Capture Steps Gait and Phase-based In-walk Kicks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pavlichenko%2C+D">Dmytro Pavlichenko</a>, <a href="/search/cs?searchtype=author&query=Ficht%2C+G">Grzegorz Ficht</a>, <a href="/search/cs?searchtype=author&query=Amini%2C+A">Arash Amini</a>, <a href="/search/cs?searchtype=author&query=Hosseini%2C+M">Mojtaba Hosseini</a>, <a href="/search/cs?searchtype=author&query=Memmesheimer%2C+R">Raphael Memmesheimer</a>, <a href="/search/cs?searchtype=author&query=Villar-Corrales%2C+A">Angel Villar-Corrales</a>, <a href="/search/cs?searchtype=author&query=Schulz%2C+S+M">Stefan M. Schulz</a>, <a href="/search/cs?searchtype=author&query=Missura%2C+M">Marcell Missura</a>, <a href="/search/cs?searchtype=author&query=Bennewitz%2C+M">Maren Bennewitz</a>, <a href="/search/cs?searchtype=author&query=Behnke%2C+S">Sven Behnke</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.02956v2-abstract-short" style="display: inline;"> Beating the human world champions by 2050 is an ambitious goal of the Humanoid League that provides a strong incentive for RoboCup teams to further improve and develop their systems. In this paper, we present upgrades of our system which enabled our team NimbRo to win the Soccer Tournament, the Drop-in Games, and the Technical Challenges in the Humanoid AdultSize League of RoboCup 2022. Strong per… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.02956v2-abstract-full').style.display = 'inline'; document.getElementById('2302.02956v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.02956v2-abstract-full" style="display: none;"> Beating the human world champions by 2050 is an ambitious goal of the Humanoid League that provides a strong incentive for RoboCup teams to further improve and develop their systems. In this paper, we present upgrades of our system which enabled our team NimbRo to win the Soccer Tournament, the Drop-in Games, and the Technical Challenges in the Humanoid AdultSize League of RoboCup 2022. Strong performance in these competitions resulted in the Best Humanoid award in the Humanoid League. The mentioned upgrades include: hardware upgrade of the vision module, balanced walking with Capture Steps, and the introduction of phase-based in-walk kicks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.02956v2-abstract-full').style.display = 'none'; document.getElementById('2302.02956v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> In: RoboCup 2022: Robot World Cup XXV. LNCS 13561, Springer, May 2023 </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Hosseini%2C+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Hosseini%2C+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Hosseini%2C+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Hosseini%2C+M&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository