CINXE.COM

Speech Processing

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8" /> <link rel="canonical" href="https://research.google/research-areas/speech-processing/" /><meta property="og:title" content="Speech Processing"><meta property="og:url" content="https://research.google/research-areas/speech-processing/"><meta property="og:image" content="https://storage.googleapis.com/gweb-research2023-media/images/Open_Graph.width-800.format-jpeg.jpg"><meta property="og:image:secure_url" content="https://storage.googleapis.com/gweb-research2023-media/images/Open_Graph.width-800.format-jpeg.jpg"><meta property="og:type" content="Website"> <title>Speech Processing</title> <meta name="viewport" content="width=device-width, initial-scale=1 viewport-fit=cover"/> <link rel="icon" type="image/png" href="/gr/static/assets/favicon.ico"> <link rel="preconnect" href="https://fonts.googleapis.com"> <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> <link rel="preload" href="https://fonts.googleapis.com/css2?family=Product+Sans&family=Google+Sans+Display:ital@0;1&family=Google+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&family=Google+Sans+Text:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&display=swap" as="style"> <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Product+Sans&family=Google+Sans+Display:ital@0;1&family=Google+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&family=Google+Sans+Text:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&display=swap"> <link href="https://fonts.googleapis.com/css2?family=Roboto+Mono:wght@400;700&display=swap" rel="stylesheet"> <link href="https://www.gstatic.com/glue/cookienotificationbar/cookienotificationbar.min.css" rel="stylesheet" /> <link href="https://www.gstatic.com/glue/v27_1/glue-material.min.css" rel="stylesheet"> <link rel="stylesheet" type="text/css" href="/gr/static/css/googleresearch.css?id=0c26ea1fed8bdd0324f9f4fad1f6a470"> <!-- Google Tag Manager --> <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= 'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-K8QBZ7Q'); </script> <!-- End Google Tag Manager --> </head> <body class=" js-google-tag-wrapper" data-gt-page-path="https://research.google/research-areas/speech-processing/" data-env="production"> <!-- Google Tag Manager (noscript) --> <noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-K8QBZ7Q" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript> <!-- End Google Tag Manager (noscript) --> <header class="global-header glue-header glue-header--single not-glue"> <a href="#page-content" class="glue-header__skip-content">Jump to Content</a> <div class="glue-header__bar glue-header__bar--mobile not-glue"> <div class="glue-header__tier not-glue"> <!-- mobile lockup component --> <div class="glue-header__container"> <div class="glue-header__lock-up"> <!-- Hamburger button component --> <div class="glue-header__hamburger"> <button class="glue-header__drawer-toggle-btn" aria-label="Open the navigation drawer"> <svg class="glue-icon glue-icon--24px" role="presentation" aria-hidden="true"> <use href="/gr/static/assets/icons/glue-icons.svg#menu"></use> </svg> </button> </div> <div class="glue-header__logo"> <a class="glue-header__logo-link" href="/" title="Google Research"> <!-- Logo component --> <div class="glue-header__logo-container"> <svg role="presentation" aria-hidden="true" alt='Google' class="glue-icon glue-icon glue-header__logo-svg"> <use href="/gr/static/assets/icons/glue-icons.svg#google-color-logo"></use> </svg> </div> <span class="glue-header__logo--product">Research</span> </a> </div> </div> </div> </div> </div> <div class="glue-header__bar glue-header__bar--desktop glue-header__drawer"> <div class="glue-header__tier"> <!-- desktop lockup component --> <div class="glue-header__container"> <div class="glue-header__lock-up"> <div class="glue-header__logo"> <a class="glue-header__logo-link" href="/" title="Google Research"> <!-- Logo component --> <div class="glue-header__logo-container"> <svg role="presentation" aria-hidden="true" alt='Google' class="glue-icon glue-icon glue-header__logo-svg not-glue --dark-logo"> <use href="/gr/static/assets/icons/glue-icons.svg#google-solid-logo"></use> </svg> <svg role="presentation" aria-hidden="true" alt='Google' class="glue-icon glue-icon glue-header__logo-svg --light-logo"> <use href="/gr/static/assets/icons/glue-icons.svg#google-color-logo"></use> </svg> </div> <span class="glue-header__logo--product">Research</span> </a> </div> </div> </div> <!-- linkbar component --> <div class="glue-header__container glue-header__container--linkbar"> <nav class="glue-header__link-bar navigation js-gt-global-nav-wrapper"> <ul class="glue-header__list"> <li class="glue-header__item js-sub-nav-parent --parent" data-gt-primary="Who we are" > <button class="glue-header__link js-sub-nav-target" aria-haspopup="true" aria-expanded="false" > <span class=""> Who we are <span class="icon icon--caret"></span> </span> </button> <div class="navigation__sub js-sub-nav" role="menu"> <div class="navigation__sub__container"> <div class="navigation__sub__mobile-heading"> <button class="glue-header__link js-sub-nav-close-mobile"> <span class="sr-text">Back to</span> <span class="icon icon--caret"></span> Who we are <span class="sr-text">menu</span> </button> <hr/> </div> <div class="block-nav_drawer_columns_content"> <div class="navigation__sub--content" data-gt-secondary="Defining the technology of today and tomorrow."> <div class="navigation__sub__wrapper"> <div class="navigation__sub__heading"> <h2 class="headline-3">Defining the technology of today and tomorrow.</h2> </div> <ul class="navigation__sub__columns"> <li data-gt-secondary="Philosophy"> <div class="navigation__sub__columns__desktop"> <h2 class="headline-6 navigation__sub__columns__heading"> Philosophy </h2> <p class="navigation__sub__columns__description caption">We strive to create an environment conducive to many different types of research across many different time scales and levels of risk.</p> <a href="https://research.google/philosophy/" class="glue-inline-link js-drawer-link" > <span class="sr-text">Learn more about our Philosophy</span> <span aria-hidden="true">Learn more</span> </a> </div> <div class="navigation__sub__columns__mobile"> <a class="glue-header__link" href="https://research.google/philosophy/" > Philosophy </a> </div> </li> <li data-gt-secondary="People"> <div class="navigation__sub__columns__desktop"> <h2 class="headline-6 navigation__sub__columns__heading"> People </h2> <p class="navigation__sub__columns__description caption">Our researchers drive advancements in computer science through both fundamental and applied research.</p> <a href="https://research.google/people/" class="glue-inline-link js-drawer-link" > <span class="sr-text">Learn more about our People</span> <span aria-hidden="true">Learn more</span> </a> </div> <div class="navigation__sub__columns__mobile"> <a class="glue-header__link" href="https://research.google/people/" > People </a> </div> </li> </ul> </div> </div> </div> </div> </div> </li> <li class="glue-header__item js-sub-nav-parent --parent" data-gt-primary="Research areas" > <button class="glue-header__link js-sub-nav-target" aria-haspopup="true" aria-expanded="false" > <span class=""> Research areas <span class="icon icon--caret"></span> </span> </button> <div class="navigation__sub js-sub-nav" role="menu"> <div class="navigation__sub__container"> <div class="navigation__sub__mobile-heading"> <button class="glue-header__link js-sub-nav-close-mobile"> <span class="sr-text">Back to</span> <span class="icon icon--caret"></span> Research areas <span class="sr-text">menu</span> </button> <hr/> </div> <div class="block-nav_drawer_columns_link_list"> <div class="navigation__sub--list"> <div class="navigation__sub__wrapper"> <ul class="navigation__sub__columns"> <li data-gt-secondary="Research areas"> <div class="navigation__sub__columns__desktop"> <h2 class="headline-6 navigation__sub__columns__heading">Research areas</h2> <ul> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/" > Explore all research areas </a> </li> </ul> </div> <div class="navigation__sub__columns__mobile"> <button class="glue-header__link js-sub-nav-target" data-panel="nested" role="menuitem" aria-haspopup="true"> Research areas <span class="icon icon--caret"></span> </button> <div class="navigation__nested-sub js-sub-nav-parent"> <div class="navigation__sub__mobile-heading"> <button class="glue-header__link js-sub-nav-close-mobile" role="menuitem" aria-haspopup="true"> <span class="sr-text">Back to</span> <span class="icon icon--caret"></span> Research areas <span class="sr-text">menu</span> </button> <hr/> </div> <ul> <li role="menuitem"> <a href="https://research.google/research-areas/" class="navigation__sub__columns__mobile__link" > Explore all research areas <span> </span> </a> </li> </ul> </div> </div> </li> <li data-gt-secondary="Foundational ML &amp; Algorithms"> <div class="navigation__sub__columns__desktop"> <h2 class="headline-6 navigation__sub__columns__heading">Foundational ML &amp; Algorithms</h2> <ul> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/algorithms-and-theory/" > Algorithms &amp; Theory </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/data-management/" > Data Management </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/data-mining-and-modeling/" > Data Mining &amp; Modeling </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/information-retrieval-and-the-web/" > Information Retrieval &amp; the Web </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/machine-intelligence/" > Machine Intelligence </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/machine-perception/" > Machine Perception </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/machine-translation/" > Machine Translation </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/natural-language-processing/" > Natural Language Processing </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/speech-processing/" > Speech Processing </a> </li> </ul> </div> <div class="navigation__sub__columns__mobile"> <button class="glue-header__link js-sub-nav-target" data-panel="nested" role="menuitem" aria-haspopup="true"> Foundational ML &amp; Algorithms <span class="icon icon--caret"></span> </button> <div class="navigation__nested-sub js-sub-nav-parent"> <div class="navigation__sub__mobile-heading"> <button class="glue-header__link js-sub-nav-close-mobile" role="menuitem" aria-haspopup="true"> <span class="sr-text">Back to</span> <span class="icon icon--caret"></span> Foundational ML &amp; Algorithms <span class="sr-text">menu</span> </button> <hr/> </div> <ul> <li role="menuitem"> <a href="https://research.google/research-areas/algorithms-and-theory/" class="navigation__sub__columns__mobile__link" > Algorithms &amp; Theory <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/data-management/" class="navigation__sub__columns__mobile__link" > Data Management <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/data-mining-and-modeling/" class="navigation__sub__columns__mobile__link" > Data Mining &amp; Modeling <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/information-retrieval-and-the-web/" class="navigation__sub__columns__mobile__link" > Information Retrieval &amp; the Web <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/machine-intelligence/" class="navigation__sub__columns__mobile__link" > Machine Intelligence <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/machine-perception/" class="navigation__sub__columns__mobile__link" > Machine Perception <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/machine-translation/" class="navigation__sub__columns__mobile__link" > Machine Translation <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/natural-language-processing/" class="navigation__sub__columns__mobile__link" > Natural Language Processing <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/speech-processing/" class="navigation__sub__columns__mobile__link" > Speech Processing <span> </span> </a> </li> </ul> </div> </div> </li> <li data-gt-secondary="Computing Systems &amp; Quantum AI"> <div class="navigation__sub__columns__desktop"> <h2 class="headline-6 navigation__sub__columns__heading">Computing Systems &amp; Quantum AI</h2> <ul> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/distributed-systems-and-parallel-computing/" > Distributed Systems &amp; Parallel
Computing </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/hardware-and-architecture/" > Hardware &amp; Architecture </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/mobile-systems/" > Mobile Systems </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/networking/" > Networking </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/quantum-computing/" > Quantum Computing </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/robotics/" > Robotics </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/security-privacy-and-abuse-prevention/" > Security, Privacy, &amp; Abuse
Prevention </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/software-engineering/" > Software Engineering </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/software-systems/" > Software Systems </a> </li> </ul> </div> <div class="navigation__sub__columns__mobile"> <button class="glue-header__link js-sub-nav-target" data-panel="nested" role="menuitem" aria-haspopup="true"> Computing Systems &amp; Quantum AI <span class="icon icon--caret"></span> </button> <div class="navigation__nested-sub js-sub-nav-parent"> <div class="navigation__sub__mobile-heading"> <button class="glue-header__link js-sub-nav-close-mobile" role="menuitem" aria-haspopup="true"> <span class="sr-text">Back to</span> <span class="icon icon--caret"></span> Computing Systems &amp; Quantum AI <span class="sr-text">menu</span> </button> <hr/> </div> <ul> <li role="menuitem"> <a href="https://research.google/research-areas/distributed-systems-and-parallel-computing/" class="navigation__sub__columns__mobile__link" > Distributed Systems &amp; Parallel
Computing <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/hardware-and-architecture/" class="navigation__sub__columns__mobile__link" > Hardware &amp; Architecture <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/mobile-systems/" class="navigation__sub__columns__mobile__link" > Mobile Systems <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/networking/" class="navigation__sub__columns__mobile__link" > Networking <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/quantum-computing/" class="navigation__sub__columns__mobile__link" > Quantum Computing <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/robotics/" class="navigation__sub__columns__mobile__link" > Robotics <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/security-privacy-and-abuse-prevention/" class="navigation__sub__columns__mobile__link" > Security, Privacy, &amp; Abuse
Prevention <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/software-engineering/" class="navigation__sub__columns__mobile__link" > Software Engineering <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/software-systems/" class="navigation__sub__columns__mobile__link" > Software Systems <span> </span> </a> </li> </ul> </div> </div> </li> <li data-gt-secondary="Science, AI &amp; Society"> <div class="navigation__sub__columns__desktop"> <h2 class="headline-6 navigation__sub__columns__heading">Science, AI &amp; Society</h2> <ul> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/climate-and-sustainability/" > Climate &amp; Sustainability </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/economics-and-electronic-commerce/" > Economics &amp; Electronic Commerce </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/education-innovation/" > Education Innovation </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/general-science/" > General Science </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/health-bioscience/" > Health &amp; Bioscience </a> </li> <li> <a class="navigation__sub__columns__list-link caption js-drawer-link" href="https://research.google/research-areas/human-computer-interaction-and-visualization/" > Human-Computer Interaction and Visualization </a> </li> </ul> </div> <div class="navigation__sub__columns__mobile"> <button class="glue-header__link js-sub-nav-target" data-panel="nested" role="menuitem" aria-haspopup="true"> Science, AI &amp; Society <span class="icon icon--caret"></span> </button> <div class="navigation__nested-sub js-sub-nav-parent"> <div class="navigation__sub__mobile-heading"> <button class="glue-header__link js-sub-nav-close-mobile" role="menuitem" aria-haspopup="true"> <span class="sr-text">Back to</span> <span class="icon icon--caret"></span> Science, AI &amp; Society <span class="sr-text">menu</span> </button> <hr/> </div> <ul> <li role="menuitem"> <a href="https://research.google/research-areas/climate-and-sustainability/" class="navigation__sub__columns__mobile__link" > Climate &amp; Sustainability <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/economics-and-electronic-commerce/" class="navigation__sub__columns__mobile__link" > Economics &amp; Electronic Commerce <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/education-innovation/" class="navigation__sub__columns__mobile__link" > Education Innovation <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/general-science/" class="navigation__sub__columns__mobile__link" > General Science <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/health-bioscience/" class="navigation__sub__columns__mobile__link" > Health &amp; Bioscience <span> </span> </a> </li> <li role="menuitem"> <a href="https://research.google/research-areas/human-computer-interaction-and-visualization/" class="navigation__sub__columns__mobile__link" > Human-Computer Interaction and Visualization <span> </span> </a> </li> </ul> </div> </div> </li> </ul> </div> </div></div> </div> </div> </li> <li class="glue-header__item js-sub-nav-parent --parent" data-gt-primary="Our work" > <button class="glue-header__link js-sub-nav-target" aria-haspopup="true" aria-expanded="false" > <span class=""> Our work <span class="icon icon--caret"></span> </span> </button> <div class="navigation__sub js-sub-nav" role="menu"> <div class="navigation__sub__container"> <div class="navigation__sub__mobile-heading"> <button class="glue-header__link js-sub-nav-close-mobile"> <span class="sr-text">Back to</span> <span class="icon icon--caret"></span> Our work <span class="sr-text">menu</span> </button> <hr/> </div> <div class="block-nav_drawer_columns_content"> <div class="navigation__sub--content" data-gt-secondary=""> <div class="navigation__sub__wrapper"> <ul class="navigation__sub__columns"> <li data-gt-secondary="Projects"> <div class="navigation__sub__columns__desktop"> <h2 class="headline-6 navigation__sub__columns__heading"> Projects </h2> <p class="navigation__sub__columns__description caption">We regularly open-source projects with the broader research community and apply our developments to Google products.</p> <a href="https://research.google/resources/our-projects/" class="glue-inline-link js-drawer-link" > <span class="sr-text">Learn more about our Projects</span> <span aria-hidden="true">Learn more</span> </a> </div> <div class="navigation__sub__columns__mobile"> <a class="glue-header__link" href="https://research.google/resources/our-projects/" > Projects </a> </div> </li> <li data-gt-secondary="Publications"> <div class="navigation__sub__columns__desktop"> <h2 class="headline-6 navigation__sub__columns__heading"> Publications </h2> <p class="navigation__sub__columns__description caption">Publishing our work allows us to share ideas and work collaboratively to advance the field of computer science.</p> <a href="https://research.google/pubs/" class="glue-inline-link js-drawer-link" > <span class="sr-text">Learn more about our Publications</span> <span aria-hidden="true">Learn more</span> </a> </div> <div class="navigation__sub__columns__mobile"> <a class="glue-header__link" href="https://research.google/pubs/" > Publications </a> </div> </li> <li data-gt-secondary="Resources"> <div class="navigation__sub__columns__desktop"> <h2 class="headline-6 navigation__sub__columns__heading"> Resources </h2> <p class="navigation__sub__columns__description caption">We make products, tools, and datasets available to everyone with the goal of building a more collaborative ecosystem.</p> <a href="https://research.google/resources/" class="glue-inline-link js-drawer-link" > <span class="sr-text">Learn more about our Resources</span> <span aria-hidden="true">Learn more</span> </a> </div> <div class="navigation__sub__columns__mobile"> <a class="glue-header__link" href="https://research.google/resources/" > Resources </a> </div> </li> </ul> </div> </div> </div> </div> </div> </li> <li class="glue-header__item js-sub-nav-parent --parent" data-gt-primary="Programs &amp; events" > <button class="glue-header__link js-sub-nav-target" aria-haspopup="true" aria-expanded="false" > <span class=""> Programs &amp; events <span class="icon icon--caret"></span> </span> </button> <div class="navigation__sub js-sub-nav" role="menu"> <div class="navigation__sub__container"> <div class="navigation__sub__mobile-heading"> <button class="glue-header__link js-sub-nav-close-mobile"> <span class="sr-text">Back to</span> <span class="icon icon--caret"></span> Programs &amp; events <span class="sr-text">menu</span> </button> <hr/> </div> <div class="block-nav_drawer_columns_content"> <div class="navigation__sub--content" data-gt-secondary="Shaping the future, together."> <div class="navigation__sub__wrapper"> <div class="navigation__sub__heading"> <h2 class="headline-3">Shaping the future, together.</h2> <a href="https://research.google/programs-and-events/" class="js-drawer-link" > Collaborate with us </a> </div> <ul class="navigation__sub__columns"> <li data-gt-secondary="Student programs"> <div class="navigation__sub__columns__desktop"> <h2 class="headline-6 navigation__sub__columns__heading"> Student programs </h2> <p class="navigation__sub__columns__description caption">Supporting the next generation of researchers through a wide range of programming.</p> <a href="https://research.google/programs-and-events/student-engagement/" class="glue-inline-link js-drawer-link" > <span class="sr-text">Learn more about our Student programs</span> <span aria-hidden="true">Learn more</span> </a> </div> <div class="navigation__sub__columns__mobile"> <a class="glue-header__link" href="https://research.google/programs-and-events/student-engagement/" > Student programs </a> </div> </li> <li data-gt-secondary="Faculty programs"> <div class="navigation__sub__columns__desktop"> <h2 class="headline-6 navigation__sub__columns__heading"> Faculty programs </h2> <p class="navigation__sub__columns__description caption">Participating in the academic research community through meaningful engagement with university faculty.</p> <a href="https://research.google/programs-and-events/faculty-engagement/" class="glue-inline-link js-drawer-link" > <span class="sr-text">Learn more about our Faculty programs</span> <span aria-hidden="true">Learn more</span> </a> </div> <div class="navigation__sub__columns__mobile"> <a class="glue-header__link" href="https://research.google/programs-and-events/faculty-engagement/" > Faculty programs </a> </div> </li> <li data-gt-secondary="Conferences &amp; events"> <div class="navigation__sub__columns__desktop"> <h2 class="headline-6 navigation__sub__columns__heading"> Conferences &amp; events </h2> <p class="navigation__sub__columns__description caption">Connecting with the broader research community through events is essential for creating progress in every aspect of our work.</p> <a href="https://research.google/conferences-and-events/" class="glue-inline-link js-drawer-link" > <span class="sr-text">Learn more about our Conferences &amp; events</span> <span aria-hidden="true">Learn more</span> </a> </div> <div class="navigation__sub__columns__mobile"> <a class="glue-header__link" href="https://research.google/conferences-and-events/" > Conferences &amp; events </a> </div> </li> </ul> <div class="navigation__sub__cta"> <a class="glue-button glue-button--high-emphasis js-drawer-link" href="https://research.google/programs-and-events/" target="_blank" rel="noreferrer noopener" > Collaborate with us </a> </div> </div> </div> </div> </div> </div> </li> <li class="glue-header__item " data-gt-primary="Careers" > <a class="glue-header__link " href="https://research.google/careers/" > <span class=""> Careers </span> </a> </li> <li class="glue-header__item " data-gt-primary="Blog" > <a class="glue-header__link " href="https://research.google/blog/" > <span class=""> Blog </span> </a> </li> </ul> </nav> </div> <!-- search (hide on search page) --> <div class="glue-header__search js-header-search"> <div class="glue-header__search__input"> <div class="search-input " data-type="header"> <input type="search" class="caption --empty-search js-search-bar js-gt-search-input" placeholder="Search"> <button class="search-input__button --search js-gt-search-btn"> <svg role="presentation" aria-hidden="true" class="glue-icon glue-icon--18px "> <use href="/gr/static/assets/icons/glue-icons.svg#search"></use> </svg> </button> <button class="search-input__button --clear"> <svg role="presentation" aria-hidden="true" class="glue-icon glue-icon--18px "> <use href="/gr/static/assets/icons/glue-icons.svg#close"></use> </svg> </button> </div> </div> <button class="glue-header__search__btn js-header-search-btn"> <svg role="presentation" aria-hidden="true" aria-hidden="true" class="glue-icon glue-icon--24px search"> <use href="/gr/static/assets/icons/glue-icons.svg#search"></use> </svg> <svg role="presentation" aria-hidden="true" aria-hidden="true" class="glue-icon glue-icon--24px close"> <use href="/gr/static/assets/icons/glue-icons.svg#close"></use> </svg> <span class="sr-text js-header-search-sr-text">Search</span> </button> </div> </div> </div> <div class="glue-header__drawer-backdrop"> <div class="glue-header__mobile_close"> <button class="glue-header__drawer-toggle-btn js-mobile-nav-close" aria-label="Close the navigation drawer"> <svg class="glue-icon glue-icon--24px" role="presentation" aria-hidden="true"> <use href="/gr/static/assets/icons/glue-icons.svg#close"></use> </svg> </button> </div> </div> </header> <main id="page-content"> <div class="research-area-detail"> <section class="basic-hero bhoig --theme-dark " data-gt-id="basic_hero" data-gt-component-name=""> <div class="glue-page"> <div class="glue-grid"> <div class="bhoig__image-wrapper glue-grid__col--span-2 glue-grid__col--span-3-md glue-grid__col--span-2-lg"> <div class="bhoig__image-bg" style="background-color: var(--glue-grey-900); "> <span class="icon icon--speech-processing" style="" ></span> </div> </div> <div class="bhoig__breadcrumb-wrapper glue-grid__col--span-10 glue-grid__col--span-9-md glue-grid__col--span-10-lg"> <nav class="glue-breadcrumbs" aria-label="Breadcrumbs"> <ol class="glue-breadcrumbs__list"> <li class="glue-breadcrumbs__item"> <a class="glue-breadcrumbs__link attribution" href="/">Home</a> <svg role="presentation" aria-hidden="true" class="glue-icon "> <use href="/gr/static/assets/icons/glue-icons.svg#chevron-right"></use> </svg> </li> <li class="glue-breadcrumbs__item"> <a class="glue-breadcrumbs__link attribution" href="/research-areas/">Research areas</a> <svg role="presentation" aria-hidden="true" class="glue-icon "> <use href="/gr/static/assets/icons/glue-icons.svg#chevron-right"></use> </svg> </li> </ol> </nav> </div> <h1 class="headline-1 bhoig__headline glue-grid__col--span-10 glue-grid__col--span-9-md glue-grid__col--span-10-lg">Speech Processing</h1> <div class="basic-hero__description bhoig__description glue-grid__col--span-10 glue-grid__col--span-9-md glue-grid__col--span-10-lg"> <p data-block-key="sjdkk">The research goal for speech at Google aligns with our company mission: to organize the world’s information and make it universally accessible and useful. Our pioneering research work in speech processing has enabled us to build automatic speech recognition (ASR) and text-to-speech (TTS) systems that are used across Google products, with support for more than a hundred language varieties spoken across the globe. From Gboard dictation to transcriptions of voice notes, from YouTube captions to team meetings without language barriers, and from Google Maps speaking directions aloud to Google Assistant reading the news, Google’s speech research has unparalleled reach and impact. We aim to solve speech for everyone, everywhere – and work to further improve quality, speed and versatility across all kinds of speech. We're also committed to expanding our language coverage, and have <a href="https://blog.google/technology/ai/ways-ai-is-scaling-helpful/" target="_blank" rel="noopener noreferrer">set a moonshot goal to build speech technologies</a> for 1,000 languages.</p><p data-block-key="e064g">Google's speech research efforts push the state-of-the-art on architectures and algorithms used across areas like speech recognition, text-to-speech synthesis, keyword spotting, speaker recognition, and language identification. The systems we build are deployed on servers in Google’s data centers but also increasingly on-device. The team has a passion for research that leads to product advances for the billions of users that use speech in Google products today. We also release academic publications and open-source projects for the broader research community to leverage.</p><p data-block-key="3a850">Our speech technologies are embedded in products like the Assistant, Search, Gboard, Translate, Maps, YouTube, Cloud, and many more. Thanks to close collaborations with product teams, we are in a unique position to deliver user-centric research. Our researchers can conduct live experiments to test and benchmark new algorithms directly in a realistic controlled environment. Whether these are algorithmic improvements or user experience and human-computer interaction studies, we focus on solving real problems with real impact on users.</p><p data-block-key="8j506">We value our user diversity, and have made it a priority to deliver the best performance to every language and language variety. Today, our speech systems operate in more than 130 language varieties, and we continue to expand our reach. The challenges of internationalizing at scale are immense and rewarding. We are breaking new ground by deploying speech technologies that help people communicate, access information online, and share their knowledge – all in their language. And combined with the unprecedented translation capabilities of Google Translate, we are also at the forefront of research in speech-to-speech translation and one step closer to a universal translator.</p> </div> <div class="bhoig__cta glue-grid__col--span-10 glue-grid__col--span-9-md glue-grid__col--span-10-lg"> </div> </div> </div> </section> <section class="offset-two-up"> <div class="glue-page glue-grid"> <div class="offset-two-up__left-col glue-grid__col glue-grid__col--span-4-sm glue-grid__col--span-12-md glue-grid__col--span-3-lg"> <h3 class="offset-two-up__headline headline-3">Recent Publications</h3> <div class="offset-two-up__cta-below-description"> <a class="glue-button glue-button--medium-emphasis" href="https://research.google/pubs/?category=speech-processing" > <span class="js-gt-item-id">See More</span> </a> </div> </div> <div class="glue-grid__col glue-grid__col--span-4-sm glue-grid__col--span-12-md glue-grid__col--span-9-lg"> <div class="publications-list --theme- row-card-list" data-hot-swap="pub-list" data-gt-id="publications_list" data-gt-component-name=""> <div class="row-card"> <div class="row-card__container"> <div class="row-card__body"> <a class="row-card__heading headline-6 glue-link" href=https://research.google/pubs/nomad-unsupervised-learning-of-perceptual-embeddings-for-speech-enhancement-and-non-matching-reference-audio-quality-assessment/ > NOMAD: Unsupervised Learning of Perceptual Embeddings for Speech Enhancement and Non-matching Reference Audio Quality Assessment </a> <div class="row-card__subheading"> <div class="row-card__subheading__item extra-small-text"> Alessandro Ragano </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Andrew Hines </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/janskoglund/"> Jan Skoglund </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> ICASSP 2024 (to appear) </div> </div> </div> <div class="row-card__cta headline-6"> <div class="glue-tooltip" data-glue-tooltip-auto-position="false"> <button class="glue-button glue-button--low-emphasis glue-tooltip__trigger" aria-describedby=tooltip-contentthis-paper-presents-nomad-non-mat tabindex=0 > <span class="js-gt-item-id">Preview</span> </button> <span id="tooltip-contentthis-paper-presents-nomad-non-mat" class="glue-tooltip__content" role="tooltip"> <span data-tooltip-type="simple"> Preview abstract </span> <span data-tooltip-type="rich"> <span class="glue-tooltip__body">This paper presents NOMAD (Non-Matching Audio Distance), a differentiable perceptual similarity metric that measures the distance of a degraded signal against non-matching references. The proposed method is based on learning deep feature embeddings via a triplet loss guided by the Neurogram Similarity Index Measure (NSIM) to capture degradation intensity. During inference, the similarity score between any two audio samples is computed through Euclidean distance of their embeddings. NOMAD is fully unsupervised and can be used in general perceptual audio tasks for audio analysis e.g. quality assessment and generative tasks such as speech enhancement and speech synthesis. The proposed method is evaluated with 3 tasks. Ranking degradation intensity, predicting speech quality, and as a loss function for speech enhancement. Results indicate NOMAD outperforms other non-matching reference approaches in both ranking degradation intensity and quality assessment, exhibiting competitive performance with full-reference audio metrics. NOMAD demonstrates a promising technique that mimics human capabilities in assessing audio quality with non-matching references to learn perceptual embeddings without the need for human-generated labels.</span> <a class="glue-button glue-button--low-emphasis" href="https://research.google/pubs/nomad-unsupervised-learning-of-perceptual-embeddings-for-speech-enhancement-and-non-matching-reference-audio-quality-assessment/" > <span class="js-gt-item-id">View details</span> </a> </span> </span> </div> </div> </div> </div> <div class="row-card"> <div class="row-card__container"> <div class="row-card__body"> <a class="row-card__heading headline-6 glue-link" href=https://research.google/pubs/spoken-question-answering-and-speech-continuation-using-spectrogram-powered-llm/ > Spoken Question Answering and Speech Continuation Using Spectrogram-Powered LLM </a> <div class="row-card__subheading"> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/michelletadmorramanovich/"> Michelle Tadmor Ramanovich </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Ehud Rivlin </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/juliansalazar/"> Julian Salazar </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/eliya-nachmani/"> Eliya Nachmani </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/rjskerryryan/"> RJ Skerry-Ryan </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/sorooshmariooryad/"> Soroosh Mariooryad </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Chulayuth Asawaroengchai </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Roy Hirsch </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Alon Levkovitch </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> ICLR (2024) </div> </div> </div> <div class="row-card__cta headline-6"> <div class="glue-tooltip" data-glue-tooltip-auto-position="false"> <button class="glue-button glue-button--low-emphasis glue-tooltip__trigger" aria-describedby=tooltip-contentwe-present-spectron-a-novel-appro tabindex=0 > <span class="js-gt-item-id">Preview</span> </button> <span id="tooltip-contentwe-present-spectron-a-novel-appro" class="glue-tooltip__content" role="tooltip"> <span data-tooltip-type="simple"> Preview abstract </span> <span data-tooltip-type="rich"> <span class="glue-tooltip__body">We present Spectron, a novel approach to adapting pre-trained large language models (LLMs) to perform spoken question answering (QA) and speech continuation. By endowing the LLM with a pre-trained speech encoder, our model becomes able to take speech inputs and generate speech outputs. The entire system is trained endto-end and operates directly on spectrograms, simplifying our architecture. Key to our approach is a training objective that jointly supervises speech recognition, text continuation, and speech synthesis using only paired speech-text pairs, enabling a ‘cross-modal’ chain-of-thought within a single decoding pass. Our method surpasses existing spoken language models in speaker preservation and semantic coherence. Furthermore, the proposed model improves upon direct initialization in retaining the knowledge of the original LLM as demonstrated through spoken QA datasets. We release our audio samples and spoken QA dataset via our website.</span> <a class="glue-button glue-button--low-emphasis" href="https://research.google/pubs/spoken-question-answering-and-speech-continuation-using-spectrogram-powered-llm/" > <span class="js-gt-item-id">View details</span> </a> </span> </span> </div> </div> </div> </div> <div class="row-card"> <div class="row-card__container"> <div class="row-card__body"> <a class="row-card__heading headline-6 glue-link" href=https://research.google/pubs/now-you-see-me-now-you-dont-poverty-of-the-stimulus-problems-and-arbitrary-correspondences-in-end-to-end-speech-models/ > Now You See Me, Now You Don&#x27;t: &#x27;Poverty of the Stimulus&#x27; Problems and Arbitrary Correspondences in End-to-End Speech Models </a> <div class="row-card__subheading"> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/daanvanesch/"> Daan van Esch </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Proceedings of the Second Workshop on Computation and Written Language (CAWL) 2024 </div> </div> </div> <div class="row-card__cta headline-6"> <div class="glue-tooltip" data-glue-tooltip-auto-position="false"> <button class="glue-button glue-button--low-emphasis glue-tooltip__trigger" aria-describedby=tooltip-contentend-to-end-models-for-speech-recog tabindex=0 > <span class="js-gt-item-id">Preview</span> </button> <span id="tooltip-contentend-to-end-models-for-speech-recog" class="glue-tooltip__content" role="tooltip"> <span data-tooltip-type="simple"> Preview abstract </span> <span data-tooltip-type="rich"> <span class="glue-tooltip__body">End-to-end models for speech recognition and speech synthesis have many benefits, but we argue they also face a unique set of challenges not encountered in conventional multi-stage hybrid systems, which relied on the explicit injection of linguistic knowledge through resources such as phonemic dictionaries and verbalization grammars. These challenges include handling words with unusual grapheme-to-phoneme correspondences, converting between written forms like ‘12’ and spoken forms such as ‘twelve’, and contextual disambiguation of homophones or homographs. We describe the mitigation strategies that have been used for these problems in end-to-end systems, either implicitly or explicitly, and call out that the most commonly used mitigation techniques are likely incompatible with newly emerging approaches that use minimal amounts of supervised audio training data. We review best-of-both-world approaches that allow the use of end-to-end models combined with traditional linguistic resources, which we show are increasingly straightforward to create at scale, and close with an optimistic outlook for bringing speech technologies to many more languages by combining these strands of research.</span> <a class="glue-button glue-button--low-emphasis" href="https://research.google/pubs/now-you-see-me-now-you-dont-poverty-of-the-stimulus-problems-and-arbitrary-correspondences-in-end-to-end-speech-models/" > <span class="js-gt-item-id">View details</span> </a> </span> </span> </div> </div> </div> </div> <div class="row-card"> <div class="row-card__container"> <div class="row-card__body"> <a class="row-card__heading headline-6 glue-link" href=https://research.google/pubs/streamvc-real-time-low-latency-voice-conversion/ > StreamVC: Real-Time Low-Latency Voice Conversion </a> <div class="row-card__subheading"> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/georgesung/"> George Sung </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/yurykartynnik/"> Yury Kartynnik </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/yangyang/"> Yang Yang </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/author38919/"> Matthias Grundmann </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/107348/"> Pen Li </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Xing Li </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Jiuqiang Tang </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> ICASSP 2024 (2024) </div> </div> </div> <div class="row-card__cta headline-6"> <div class="glue-tooltip" data-glue-tooltip-auto-position="false"> <button class="glue-button glue-button--low-emphasis glue-tooltip__trigger" aria-describedby=tooltip-contentwe-present-streamvc-a-streaming-v tabindex=0 > <span class="js-gt-item-id">Preview</span> </button> <span id="tooltip-contentwe-present-streamvc-a-streaming-v" class="glue-tooltip__content" role="tooltip"> <span data-tooltip-type="simple"> Preview abstract </span> <span data-tooltip-type="rich"> <span class="glue-tooltip__body">We present StreamVC, a streaming voice conversion solution that preserves the content and prosody of any source speech while matching the voice timbre from any target speech. Unlike previous approaches, StreamVC produces the resulting waveform at low latency from the input signal even on a mobile platform, making it applicable to real-time communication scenarios like calls and video conferencing, and addressing use cases such as voice anonymization in these scenarios. Our design leverages the architecture and training strategy of the SoundStream neural audio codec for lightweight high-quality speech synthesis. We demonstrate the feasibility of learning soft speech units causally, as well as the effectiveness of supplying whitened fundamental frequency information to improve pitch stability without leaking the source timbre information.</span> <a class="glue-button glue-button--low-emphasis" href="https://research.google/pubs/streamvc-real-time-low-latency-voice-conversion/" > <span class="js-gt-item-id">View details</span> </a> </span> </span> </div> </div> </div> </div> <div class="row-card"> <div class="row-card__container"> <div class="row-card__body"> <a class="row-card__heading headline-6 glue-link" href=https://research.google/pubs/automatic-speech-recognition-of-conversational-speech-in-individuals-with-disordered-speech/ > Automatic Speech Recognition of Conversational Speech in Individuals with Disordered Speech </a> <div class="row-card__subheading"> <div class="row-card__subheading__item extra-small-text"> Antoine Desjardins </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Katie Seaver </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/108145/"> Pan-Pan Jiang </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/philipnelson/"> Philip Q Nelson </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/107565/"> Jimmy Tobin </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Richard Cave </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Bob MacDonald </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Jordan Green </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Rus Heywood </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Journal of Speech, Language, and Hearing Research (2024) (to appear) </div> </div> </div> <div class="row-card__cta headline-6"> <div class="glue-tooltip" data-glue-tooltip-auto-position="false"> <button class="glue-button glue-button--low-emphasis glue-tooltip__trigger" aria-describedby=tooltip-contentpurpose-this-study-examines-the-e tabindex=0 > <span class="js-gt-item-id">Preview</span> </button> <span id="tooltip-contentpurpose-this-study-examines-the-e" class="glue-tooltip__content" role="tooltip"> <span data-tooltip-type="simple"> Preview abstract </span> <span data-tooltip-type="rich"> <span class="glue-tooltip__body">Purpose: This study examines the effectiveness of automatic speech recognition (ASR) for individuals with speech disorders, addressing the gap in performance between read and conversational ASR. We analyze the factors influencing this disparity and the effect of speech mode-specific training on ASR accuracy. Method: Recordings of read and conversational speech from 27 individuals with various speech disorders were analyzed using both (1) one speaker-independent ASR system trained and optimized for typical speech and (2) multiple ASR models that were personalized to the speech of the participants with disordered speech. Word Error Rates (WERs) were calculated for each speech mode, read vs conversational, and subject. Linear mixed-effect models were used to assess the impact of speech mode and disorder severity on ASR accuracy. We investigated nine variables, classified as technical, linguistic, or speech impairment factors, for their potential influence on the performance gap. Results: We found a significant performance gap between read and conversational speech in both personalized and unadapted ASR models. Speech impairment severity notably impacted recognition accuracy in unadapted models for both speech modes and in personalized models for read speech. Linguistic attributes of utterances were the most influential on accuracy, though atypical speech characteristics also played a role. Including conversational speech samples in model training notably improved recognition accuracy. Conclusions: We observed a significant performance gap in ASR accuracy between read and conversational speech for individuals with speech disorders. This gap was largely due to the linguistic complexity and unique characteristics of speech disorders in conversational speech. Training personalized ASR models using conversational speech significantly improved recognition accuracy, demonstrating the importance of domain-specific training and highlighting the need for further research into ASR systems capable of handling disordered conversational speech effectively.</span> <a class="glue-button glue-button--low-emphasis" href="https://research.google/pubs/automatic-speech-recognition-of-conversational-speech-in-individuals-with-disordered-speech/" > <span class="js-gt-item-id">View details</span> </a> </span> </span> </div> </div> </div> </div> <div class="row-card"> <div class="row-card__container"> <div class="row-card__body"> <a class="row-card__heading headline-6 glue-link" href=https://research.google/pubs/multimodal-language-identification/ > Multimodal Modeling for Spoken Language Identification </a> <div class="row-card__subheading"> <div class="row-card__subheading__item extra-small-text"> Yu Zhang </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/veraaxelrod/"> Vera Axelrod </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/105376/"> Ankur Bapna </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/daanvanesch/"> Daan van Esch </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/108512/"> Shikhar Vashishth </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/jasonriesa/"> Jason Riesa </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/parthatalukdar/"> Partha Talukdar </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Wei Han </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/106369/"> Sandy Ritchie </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> <a class="row-card__small-link" href="/people/min-ma/"> Min Ma </a> </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Shikhar Bharadwaj </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Sriram (Sri) Ganapathy </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Sid Dalmia </div> <div class="row-card__subheading__spacer"></div> <div class="row-card__subheading__item extra-small-text"> Proceedings of 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2024) (2024) </div> </div> </div> <div class="row-card__cta headline-6"> <div class="glue-tooltip" data-glue-tooltip-auto-position="false"> <button class="glue-button glue-button--low-emphasis glue-tooltip__trigger" aria-describedby=tooltip-contentspoken-language-identification-ref tabindex=0 > <span class="js-gt-item-id">Preview</span> </button> <span id="tooltip-contentspoken-language-identification-ref" class="glue-tooltip__content" role="tooltip"> <span data-tooltip-type="simple"> Preview abstract </span> <span data-tooltip-type="rich"> <span class="glue-tooltip__body">Spoken language identification refers to the task of automatically predicting the spoken language in a given utterance. Conventionally, it is modeled as a speech-based language identification task. Prior techniques have been constrained to a single modality; however in the case of video data there is a wealth of other metadata that may be beneficial for this task. In this work, we propose MuSeLI, a Multimodal Spoken Language Identification method, which delves into the use of various metadata sources to enhance language identification. Our study reveals that metadata such as video title, description and geographic location provide substantial information to identify the spoken language of the multimedia recording. We conduct experiments using two diverse public datasets of YouTube videos, and obtain state-of-the-art results on the language identification task. We additionally conduct an ablation study that describes the distinct contribution of each modality for language recognition.</span> <a class="glue-button glue-button--low-emphasis" href="https://research.google/pubs/multimodal-language-identification/" > <span class="js-gt-item-id">View details</span> </a> </span> </span> </div> </div> </div> </div> </div> </div> </div> </section> <section class="offset-two-up --theme-dark both" data-gt-id="offset_two_up" data-gt-component-name=""> <div class="glue-page glue-grid"> <div class="offset-two-up__left-col glue-grid__col glue-grid__col--span-4-sm glue-grid__col--span-12-md glue-grid__col--span-3-lg"> <h2 class="offset-two-up__headline headline-3">Some of our teams</h2> </div> <div class="glue-grid__col glue-grid__col--span-4-sm glue-grid__col--span-12-md glue-grid__col--span-9-lg"> <ul class="card-stack--small nested-glue-grid-override" data-gt-id="team_small_card_stack" data-gt-component-name="Team - Small Card Stack"> <li class="glue-grid__col glue-grid__col--span-6-md glue-grid__col--span-4-sm"> <a class="glue-card not-glue glue-card--small" href="/teams/africa-team/"> <div class="glue-card__inner"> <div class="glue-card__content"> <div class="content-wrapper"> <p class="glue-headline body js-gt-item-id"> Africa team </p> </div> <div class="glue-card__cta glue-card__cta--arrow"> <span class="glue-button glue-button--low-emphasis glue-button--icon"> <svg role="presentation" aria-hidden="true" class="glue-icon "> <use href="/gr/static/assets/icons/glue-icons.svg#arrow-forward"></use> </svg> </span> </div> </div> </div> </a> </li> <li class="glue-grid__col glue-grid__col--span-6-md glue-grid__col--span-4-sm"> <a class="glue-card not-glue glue-card--small" href="/teams/health/"> <div class="glue-card__inner"> <div class="glue-card__content"> <div class="content-wrapper"> <p class="glue-headline body js-gt-item-id"> Health </p> </div> <div class="glue-card__cta glue-card__cta--arrow"> <span class="glue-button glue-button--low-emphasis glue-button--icon"> <svg role="presentation" aria-hidden="true" class="glue-icon "> <use href="/gr/static/assets/icons/glue-icons.svg#arrow-forward"></use> </svg> </span> </div> </div> </div> </a> </li> <li class="glue-grid__col glue-grid__col--span-6-md glue-grid__col--span-4-sm"> <a class="glue-card not-glue glue-card--small" href="/teams/impact-driven-research-innovation-and-moonshots/"> <div class="glue-card__inner"> <div class="glue-card__content"> <div class="content-wrapper"> <p class="glue-headline body js-gt-item-id"> Impact-Driven Research, Innovation and Moonshots </p> </div> <div class="glue-card__cta glue-card__cta--arrow"> <span class="glue-button glue-button--low-emphasis glue-button--icon"> <svg role="presentation" aria-hidden="true" class="glue-icon "> <use href="/gr/static/assets/icons/glue-icons.svg#arrow-forward"></use> </svg> </span> </div> </div> </div> </a> </li> <li class="glue-grid__col glue-grid__col--span-6-md glue-grid__col--span-4-sm"> <a class="glue-card not-glue glue-card--small" href="/teams/perception/"> <div class="glue-card__inner"> <div class="glue-card__content"> <div class="content-wrapper"> <p class="glue-headline body js-gt-item-id"> Perception </p> </div> <div class="glue-card__cta glue-card__cta--arrow"> <span class="glue-button glue-button--low-emphasis glue-button--icon"> <svg role="presentation" aria-hidden="true" class="glue-icon "> <use href="/gr/static/assets/icons/glue-icons.svg#arrow-forward"></use> </svg> </span> </div> </div> </div> </a> </li> </ul> </div> </div> </section> <section class="banner --theme-light" data-gt-id="banner" data-gt-component-name="None"> <div class="banner__wrapper glue-page glue-grid"> <div class="banner__copy glue-grid__col glue-grid__col--span-4-sm glue-grid__col--span-6-md"> <h2 class="banner__headline headline-3">Join us</h2> <p class="banner__body-copy body">We&#x27;re always looking for more talented, passionate people.</p> <a class="glue-button glue-button--medium-emphasis" href="https://research.google/careers/" > <span class="js-gt-item-id">See opportunities</span> </a> </div> <div class="glue-grid__col glue-grid__col--span-0-sm glue-grid__col--span-1"></div> <div class="banner__image glue-grid__col glue-grid__col--span-4-sm glue-grid__col--span-5-md"> <img src="https://storage.googleapis.com/gweb-research2023-media/images/Careers.original.jpg" alt="Careers" /> </div> </div> </section> </main> <footer class="glue-footer"> <div class="glue-page"> <section class="glue-social"> <div class="glue-social__group glue-social--monochrome"> <p class="glue-social__title glue-social__title--inline"> Follow us </p> <nav class="js-gt-follow-us-wrapper" aria-label="Social media links"> <ul class="glue-social__list" role="list"> <li class="glue-social__item"> <a class="glue-social__link" href="https://twitter.com/GoogleAI" title="Follow us on x" target="_blank" rel="noopener" data-gt-method="x"" > <svg role="presentation" aria-hidden="true" class="glue-icon glue-icon--social glue-icon--24px"> <use href="/gr/static/assets/icons/twitter-x.svg#twitter-x"></use> </svg> </a> </li> <li class="glue-social__item"> <a class="glue-social__link" href="https://www.linkedin.com/showcase/googleresearch/" title="Follow us on linkedin" target="_blank" rel="noopener" data-gt-method="linkedin"" > <svg role="presentation" aria-hidden="true" class="glue-icon glue-icon--social glue-icon--24px"> <use href="/gr/static/assets/icons/glue-icons.svg#post-linkedin"></use> </svg> </a> </li> <li class="glue-social__item"> <a class="glue-social__link" href="https://www.youtube.com/c/GoogleResearch" title="Follow us on youtube" target="_blank" rel="noopener" data-gt-method="youtube"" > <svg role="presentation" aria-hidden="true" class="glue-icon glue-icon--social glue-icon--24px"> <use href="/gr/static/assets/icons/glue-icons.svg#video-youtube"></use> </svg> </a> </li> <li class="glue-social__item"> <a class="glue-social__link" href="https://github.com/google-research" title="Follow us on github" target="_blank" rel="noopener" data-gt-method="github"" > <svg role="presentation" aria-hidden="true" class="glue-icon glue-icon--social glue-icon--24px"> <use href="/gr/static/assets/icons/github.svg#github"></use> </svg> </a> </li> </ul> </nav> </div> </section> </div> <div class="glue-fullbleed"></div> <section class="glue-page"> <nav class="glue-footer__global" aria-label="Footer resource links"> <div class="glue-footer__logo"> <a href="https://www.google.com" title="Google" class="glue-footer__link"> <svg role="presentation" aria-hidden="true" class="glue-icon glue-footer__logo-img"> <use href="/gr/static/assets/icons/glue-icons.svg#google-solid-logo"></use> </svg> </a> </div> <ul class="glue-footer__global-links glue-no-bullet js-gt-global-nav-wrapper" role="list"> <li class="glue-footer__global-links-list-item" data-gt-primary="About Google"> <a class="glue-footer__link" href="https://about.google/" target="_blank" rel="noopener"> About Google </a> </li> <li class="glue-footer__global-links-list-item" data-gt-primary="Google Products"> <a class="glue-footer__link" href="https://about.google/intl/en/products/" target="_blank" rel="noopener"> Google Products </a> </li> <li class="glue-footer__global-links-list-item" data-gt-primary="Privacy"> <a class="glue-footer__link" href="https://policies.google.com/privacy" target="_blank" rel="noopener"> Privacy </a> </li> <li class="glue-footer__global-links-list-item" data-gt-primary="Terms"> <a class="glue-footer__link" href="https://policies.google.com/terms" target="_blank" rel="noopener"> Terms </a> </li> </ul> <ul class="glue-footer__global-links glue-footer__global-links--extra glue-no-bullet" role="list"> <li class="glue-footer__global-links-list-item glue-footer__global-links-list-item--extra"> <a class="glue-footer__link" href="https://support.google.com/?hl=en"> <svg role="presentation" aria-hidden="true" aria-hidden="true" class="glue-icon glue-icon--24px glue-icon--footer-help"> <use href="/gr/static/assets/icons/glue-icons.svg#help"></use> </svg> Help </a> </li> <li class="glue-footer__global-links-list-item glue-footer__global-links-list-item--extra"> <button class="glue-footer__link google-feedback js-feedback-button" href="" data-product-id="5137383" > Submit feedback </button> </li> </ul> </nav> </section> </footer> <script src="https://www.gstatic.com/glue/v27_1/material-components-web.min.js"></script> <script src="https://www.youtube.com/player_api"></script> <script type="text/javascript" src="/gr/static/js/googleresearch.js?id=b70549917812130af912601ad763f13e"></script> <script type="text/javascript" src="https://support.google.com/inapp/api.js"></script> <script src="https://www.gstatic.com/glue/cookienotificationbar/cookienotificationbar.min.js" data-glue-cookie-notification-bar-category="2B"> </script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10