Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–11 of 11 results for author: <span class="mathjax">Chodroff, E</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Chodroff, E"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Chodroff%2C+E&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Chodroff, E"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08160">arXiv:2409.08160</a> <span> [<a href="https://arxiv.org/pdf/2409.08160">pdf</a>, <a href="https://arxiv.org/format/2409.08160">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> On the Role of Context in Reading Time Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Opedal%2C+A">Andreas Opedal</a>, <a href="/search/?searchtype=author&query=Chodroff%2C+E">Eleanor Chodroff</a>, <a href="/search/?searchtype=author&query=Cotterell%2C+R">Ryan Cotterell</a>, <a href="/search/?searchtype=author&query=Wilcox%2C+E+G">Ethan Gotlieb Wilcox</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08160v3-abstract-short" style="display: inline;"> We present a new perspective on how readers integrate context during real-time language comprehension. Our proposals build on surprisal theory, which posits that the processing effort of a linguistic unit (e.g., a word) is an affine function of its in-context information content. We first observe that surprisal is only one out of many potential ways that a contextual predictor can be derived from… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08160v3-abstract-full').style.display = 'inline'; document.getElementById('2409.08160v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08160v3-abstract-full" style="display: none;"> We present a new perspective on how readers integrate context during real-time language comprehension. Our proposals build on surprisal theory, which posits that the processing effort of a linguistic unit (e.g., a word) is an affine function of its in-context information content. We first observe that surprisal is only one out of many potential ways that a contextual predictor can be derived from a language model. Another one is the pointwise mutual information (PMI) between a unit and its context, which turns out to yield the same predictive power as surprisal when controlling for unigram frequency. Moreover, both PMI and surprisal are correlated with frequency. This means that neither PMI nor surprisal contains information about context alone. In response to this, we propose a technique where we project surprisal onto the orthogonal complement of frequency, yielding a new contextual predictor that is uncorrelated with frequency. Our experiments show that the proportion of variance in reading times explained by context is a lot smaller when context is represented by the orthogonalized predictor. From an interpretability standpoint, this indicates that previous studies may have overstated the role that context has in predicting reading times. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08160v3-abstract-full').style.display = 'none'; document.getElementById('2409.08160v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19363">arXiv:2406.19363</a> <span> [<a href="https://arxiv.org/pdf/2406.19363">pdf</a>, <a href="https://arxiv.org/ps/2406.19363">ps</a>, <a href="https://arxiv.org/format/2406.19363">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Tradition or Innovation: A Comparison of Modern ASR Methods for Forced Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Rousso%2C+R">Rotem Rousso</a>, <a href="/search/?searchtype=author&query=Cohen%2C+E">Eyal Cohen</a>, <a href="/search/?searchtype=author&query=Keshet%2C+J">Joseph Keshet</a>, <a href="/search/?searchtype=author&query=Chodroff%2C+E">Eleanor Chodroff</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19363v1-abstract-short" style="display: inline;"> Forced alignment (FA) plays a key role in speech research through the automatic time alignment of speech signals with corresponding text transcriptions. Despite the move towards end-to-end architectures for speech technology, FA is still dominantly achieved through a classic GMM-HMM acoustic model. This work directly compares alignment performance from leading automatic speech recognition (ASR) me… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19363v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19363v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19363v1-abstract-full" style="display: none;"> Forced alignment (FA) plays a key role in speech research through the automatic time alignment of speech signals with corresponding text transcriptions. Despite the move towards end-to-end architectures for speech technology, FA is still dominantly achieved through a classic GMM-HMM acoustic model. This work directly compares alignment performance from leading automatic speech recognition (ASR) methods, WhisperX and Massively Multilingual Speech Recognition (MMS), against a Kaldi-based GMM-HMM system, the Montreal Forced Aligner (MFA). Performance was assessed on the manually aligned TIMIT and Buckeye datasets, with comparisons conducted only on words correctly recognized by WhisperX and MMS. The MFA outperformed both WhisperX and MMS, revealing a shortcoming of modern ASR systems. These findings highlight the need for advancements in forced alignment and emphasize the importance of integrating traditional expertise with modern innovation to foster progress. Index Terms: forced alignment, phoneme alignment, word alignment <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19363v1-abstract-full').style.display = 'none'; document.getElementById('2406.19363v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Interspeech 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04289">arXiv:2406.04289</a> <span> [<a href="https://arxiv.org/pdf/2406.04289">pdf</a>, <a href="https://arxiv.org/format/2406.04289">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> What Languages are Easy to Language-Model? A Perspective from Learning Probabilistic Regular Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Borenstein%2C+N">Nadav Borenstein</a>, <a href="/search/?searchtype=author&query=Svete%2C+A">Anej Svete</a>, <a href="/search/?searchtype=author&query=Chan%2C+R">Robin Chan</a>, <a href="/search/?searchtype=author&query=Valvoda%2C+J">Josef Valvoda</a>, <a href="/search/?searchtype=author&query=Nowak%2C+F">Franz Nowak</a>, <a href="/search/?searchtype=author&query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&query=Chodroff%2C+E">Eleanor Chodroff</a>, <a href="/search/?searchtype=author&query=Cotterell%2C+R">Ryan Cotterell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04289v5-abstract-short" style="display: inline;"> What can large language models learn? By definition, language models (LM) are distributions over strings. Therefore, an intuitive way of addressing the above question is to formalize it as a matter of learnability of classes of distributions over strings. While prior work in this direction focused on assessing the theoretical limits, in contrast, we seek to understand the empirical learnability. U… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04289v5-abstract-full').style.display = 'inline'; document.getElementById('2406.04289v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04289v5-abstract-full" style="display: none;"> What can large language models learn? By definition, language models (LM) are distributions over strings. Therefore, an intuitive way of addressing the above question is to formalize it as a matter of learnability of classes of distributions over strings. While prior work in this direction focused on assessing the theoretical limits, in contrast, we seek to understand the empirical learnability. Unlike prior empirical work, we evaluate neural LMs on their home turf-learning probabilistic languages-rather than as classifiers of formal languages. In particular, we investigate the learnability of regular LMs (RLMs) by RNN and Transformer LMs. We empirically test the learnability of RLMs as a function of various complexity parameters of the RLM and the hidden state size of the neural LM. We find that the RLM rank, which corresponds to the size of linear space spanned by the logits of its conditional distributions, and the expected length of sampled strings are strong and significant predictors of learnability for both RNNs and Transformers. Several other predictors also reach significance, but with differing patterns between RNNs and Transformers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04289v5-abstract-full').style.display = 'none'; document.getElementById('2406.04289v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.07567">arXiv:2404.07567</a> <span> [<a href="https://arxiv.org/pdf/2404.07567">pdf</a>, <a href="https://arxiv.org/format/2404.07567">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Statistics in Phonetics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tavakoli%2C+S">Shahin Tavakoli</a>, <a href="/search/?searchtype=author&query=Matteo%2C+B">Beatrice Matteo</a>, <a href="/search/?searchtype=author&query=Pigoli%2C+D">Davide Pigoli</a>, <a href="/search/?searchtype=author&query=Chodroff%2C+E">Eleanor Chodroff</a>, <a href="/search/?searchtype=author&query=Coleman%2C+J">John Coleman</a>, <a href="/search/?searchtype=author&query=Gubian%2C+M">Michele Gubian</a>, <a href="/search/?searchtype=author&query=Renwick%2C+M+E+L">Margaret E. L. Renwick</a>, <a href="/search/?searchtype=author&query=Sonderegger%2C+M">Morgan Sonderegger</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.07567v2-abstract-short" style="display: inline;"> Phonetics is the scientific field concerned with the study of how speech is produced, heard and perceived. It abounds with data, such as acoustic speech recordings, neuroimaging data, or articulatory data. In this paper, we provide an introduction to different areas of phonetics (acoustic phonetics, sociophonetics, speech perception, articulatory phonetics, speech inversion, sound change, and spee… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07567v2-abstract-full').style.display = 'inline'; document.getElementById('2404.07567v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.07567v2-abstract-full" style="display: none;"> Phonetics is the scientific field concerned with the study of how speech is produced, heard and perceived. It abounds with data, such as acoustic speech recordings, neuroimaging data, or articulatory data. In this paper, we provide an introduction to different areas of phonetics (acoustic phonetics, sociophonetics, speech perception, articulatory phonetics, speech inversion, sound change, and speech technology), an overview of the statistical methods for analyzing their data, and an introduction to the signal processing methods commonly applied to speech recordings. A major transition in the statistical modeling of phonetic data has been the shift from fixed effects to random effects regression models, the modeling of curve data (for instance via GAMMs or FDA methods), and the use of Bayesian methods. This shift has been driven in part by the increased focus on large speech corpora in phonetics, which has been driven by machine learning methods such as forced alignment. We conclude by identifying opportunities for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07567v2-abstract-full').style.display = 'none'; document.getElementById('2404.07567v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.19509">arXiv:2403.19509</a> <span> [<a href="https://arxiv.org/pdf/2403.19509">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Phonetic Segmentation of the UCLA Phonetics Lab Archive </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chodroff%2C+E">Eleanor Chodroff</a>, <a href="/search/?searchtype=author&query=Pa%C5%BEon%2C+B">Bla啪 Pa啪on</a>, <a href="/search/?searchtype=author&query=Baker%2C+A">Annie Baker</a>, <a href="/search/?searchtype=author&query=Moran%2C+S">Steven Moran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.19509v1-abstract-short" style="display: inline;"> Research in speech technologies and comparative linguistics depends on access to diverse and accessible speech data. The UCLA Phonetics Lab Archive is one of the earliest multilingual speech corpora, with long-form audio recordings and phonetic transcriptions for 314 languages (Ladefoged et al., 2009). Recently, 95 of these languages were time-aligned with word-level phonetic transcriptions (Li et… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.19509v1-abstract-full').style.display = 'inline'; document.getElementById('2403.19509v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.19509v1-abstract-full" style="display: none;"> Research in speech technologies and comparative linguistics depends on access to diverse and accessible speech data. The UCLA Phonetics Lab Archive is one of the earliest multilingual speech corpora, with long-form audio recordings and phonetic transcriptions for 314 languages (Ladefoged et al., 2009). Recently, 95 of these languages were time-aligned with word-level phonetic transcriptions (Li et al., 2021). Here we present VoxAngeles, a corpus of audited phonetic transcriptions and phone-level alignments of the UCLA Phonetics Lab Archive, which uses the 95-language CMU re-release as our starting point. VoxAngeles also includes word- and phone-level segmentations from the original UCLA corpus, as well as phonetic measurements of word and phone durations, vowel formants, and vowel f0. This corpus enhances the usability of the original data, particularly for quantitative phonetic typology, as demonstrated through a case study of vowel intrinsic f0. We also discuss the utility of the VoxAngeles corpus for general research and pedagogy in crosslinguistic phonetics, as well as for low-resource and multilingual speech technologies. VoxAngeles is free to download and use under a CC-BY-NC 4.0 license. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.19509v1-abstract-full').style.display = 'none'; document.getElementById('2403.19509v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at LREC-COLING 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.03608">arXiv:2205.03608</a> <span> [<a href="https://arxiv.org/pdf/2205.03608">pdf</a>, <a href="https://arxiv.org/format/2205.03608">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> UniMorph 4.0: Universal Morphology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Batsuren%2C+K">Khuyagbaatar Batsuren</a>, <a href="/search/?searchtype=author&query=Goldman%2C+O">Omer Goldman</a>, <a href="/search/?searchtype=author&query=Khalifa%2C+S">Salam Khalifa</a>, <a href="/search/?searchtype=author&query=Habash%2C+N">Nizar Habash</a>, <a href="/search/?searchtype=author&query=Kiera%C5%9B%2C+W">Witold Kiera艣</a>, <a href="/search/?searchtype=author&query=Bella%2C+G">G谩bor Bella</a>, <a href="/search/?searchtype=author&query=Leonard%2C+B">Brian Leonard</a>, <a href="/search/?searchtype=author&query=Nicolai%2C+G">Garrett Nicolai</a>, <a href="/search/?searchtype=author&query=Gorman%2C+K">Kyle Gorman</a>, <a href="/search/?searchtype=author&query=Ate%2C+Y+G">Yustinus Ghanggo Ate</a>, <a href="/search/?searchtype=author&query=Ryskina%2C+M">Maria Ryskina</a>, <a href="/search/?searchtype=author&query=Mielke%2C+S+J">Sabrina J. Mielke</a>, <a href="/search/?searchtype=author&query=Budianskaya%2C+E">Elena Budianskaya</a>, <a href="/search/?searchtype=author&query=El-Khaissi%2C+C">Charbel El-Khaissi</a>, <a href="/search/?searchtype=author&query=Pimentel%2C+T">Tiago Pimentel</a>, <a href="/search/?searchtype=author&query=Gasser%2C+M">Michael Gasser</a>, <a href="/search/?searchtype=author&query=Lane%2C+W">William Lane</a>, <a href="/search/?searchtype=author&query=Raj%2C+M">Mohit Raj</a>, <a href="/search/?searchtype=author&query=Coler%2C+M">Matt Coler</a>, <a href="/search/?searchtype=author&query=Samame%2C+J+R+M">Jaime Rafael Montoya Samame</a>, <a href="/search/?searchtype=author&query=Camaiteri%2C+D+S">Delio Siticonatzi Camaiteri</a>, <a href="/search/?searchtype=author&query=Sagot%2C+B">Beno卯t Sagot</a>, <a href="/search/?searchtype=author&query=Rojas%2C+E+Z">Esa煤 Zumaeta Rojas</a>, <a href="/search/?searchtype=author&query=Francis%2C+D+L">Didier L贸pez Francis</a>, <a href="/search/?searchtype=author&query=Oncevay%2C+A">Arturo Oncevay</a> , et al. (71 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.03608v3-abstract-short" style="display: inline;"> The Universal Morphology (UniMorph) project is a collaborative effort providing broad-coverage instantiated normalized morphological inflection tables for hundreds of diverse world languages. The project comprises two major thrusts: a language-independent feature schema for rich morphological annotation and a type-level resource of annotated data in diverse languages realizing that schema. This pa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.03608v3-abstract-full').style.display = 'inline'; document.getElementById('2205.03608v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.03608v3-abstract-full" style="display: none;"> The Universal Morphology (UniMorph) project is a collaborative effort providing broad-coverage instantiated normalized morphological inflection tables for hundreds of diverse world languages. The project comprises two major thrusts: a language-independent feature schema for rich morphological annotation and a type-level resource of annotated data in diverse languages realizing that schema. This paper presents the expansions and improvements made on several fronts over the last couple of years (since McCarthy et al. (2020)). Collaborative efforts by numerous linguists have added 67 new languages, including 30 endangered languages. We have implemented several improvements to the extraction pipeline to tackle some issues, e.g. missing gender and macron information. We have also amended the schema to use a hierarchical structure that is needed for morphological phenomena like multiple-argument agreement and case stacking, while adding some missing morphological features to make the schema more inclusive. In light of the last UniMorph release, we also augmented the database with morpheme segmentation for 16 languages. Lastly, this new release makes a push towards inclusion of derivational morphology in UniMorph by enriching the data and annotation schema with instances representing derivational processes from MorphyNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.03608v3-abstract-full').style.display = 'none'; document.getElementById('2205.03608v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">LREC 2022; The first two authors made equal contributions</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.17019">arXiv:2203.17019</a> <span> [<a href="https://arxiv.org/pdf/2203.17019">pdf</a>, <a href="https://arxiv.org/format/2203.17019">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> DeepFry: Identifying Vocal Fry Using Deep Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chernyak%2C+B+R">Bronya R. Chernyak</a>, <a href="/search/?searchtype=author&query=Simon%2C+T+B">Talia Ben Simon</a>, <a href="/search/?searchtype=author&query=Segal%2C+Y">Yael Segal</a>, <a href="/search/?searchtype=author&query=Steffman%2C+J">Jeremy Steffman</a>, <a href="/search/?searchtype=author&query=Chodroff%2C+E">Eleanor Chodroff</a>, <a href="/search/?searchtype=author&query=Cole%2C+J+S">Jennifer S. Cole</a>, <a href="/search/?searchtype=author&query=Keshet%2C+J">Joseph Keshet</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.17019v2-abstract-short" style="display: inline;"> Vocal fry or creaky voice refers to a voice quality characterized by irregular glottal opening and low pitch. It occurs in diverse languages and is prevalent in American English, where it is used not only to mark phrase finality, but also sociolinguistic factors and affect. Due to its irregular periodicity, creaky voice challenges automatic speech processing and recognition systems, particularly f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.17019v2-abstract-full').style.display = 'inline'; document.getElementById('2203.17019v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.17019v2-abstract-full" style="display: none;"> Vocal fry or creaky voice refers to a voice quality characterized by irregular glottal opening and low pitch. It occurs in diverse languages and is prevalent in American English, where it is used not only to mark phrase finality, but also sociolinguistic factors and affect. Due to its irregular periodicity, creaky voice challenges automatic speech processing and recognition systems, particularly for languages where creak is frequently used. This paper proposes a deep learning model to detect creaky voice in fluent speech. The model is composed of an encoder and a classifier trained together. The encoder takes the raw waveform and learns a representation using a convolutional neural network. The classifier is implemented as a multi-headed fully-connected network trained to detect creaky voice, voicing, and pitch, where the last two are used to refine creak prediction. The model is trained and tested on speech of American English speakers, annotated for creak by trained phoneticians. We evaluated the performance of our system using two encoders: one is tailored for the task, and the other is based on a state-of-the-art unsupervised representation. Results suggest our best-performing system has improved recall and F1 scores compared to previous methods on unseen data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.17019v2-abstract-full').style.display = 'none'; document.getElementById('2203.17019v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Interspeech 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.11572">arXiv:2006.11572</a> <span> [<a href="https://arxiv.org/pdf/2006.11572">pdf</a>, <a href="https://arxiv.org/format/2006.11572">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SIGMORPHON 2020 Shared Task 0: Typologically Diverse Morphological Inflection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Vylomova%2C+E">Ekaterina Vylomova</a>, <a href="/search/?searchtype=author&query=White%2C+J">Jennifer White</a>, <a href="/search/?searchtype=author&query=Salesky%2C+E">Elizabeth Salesky</a>, <a href="/search/?searchtype=author&query=Mielke%2C+S+J">Sabrina J. Mielke</a>, <a href="/search/?searchtype=author&query=Wu%2C+S">Shijie Wu</a>, <a href="/search/?searchtype=author&query=Ponti%2C+E">Edoardo Ponti</a>, <a href="/search/?searchtype=author&query=Maudslay%2C+R+H">Rowan Hall Maudslay</a>, <a href="/search/?searchtype=author&query=Zmigrod%2C+R">Ran Zmigrod</a>, <a href="/search/?searchtype=author&query=Valvoda%2C+J">Josef Valvoda</a>, <a href="/search/?searchtype=author&query=Toldova%2C+S">Svetlana Toldova</a>, <a href="/search/?searchtype=author&query=Tyers%2C+F">Francis Tyers</a>, <a href="/search/?searchtype=author&query=Klyachko%2C+E">Elena Klyachko</a>, <a href="/search/?searchtype=author&query=Yegorov%2C+I">Ilya Yegorov</a>, <a href="/search/?searchtype=author&query=Krizhanovsky%2C+N">Natalia Krizhanovsky</a>, <a href="/search/?searchtype=author&query=Czarnowska%2C+P">Paula Czarnowska</a>, <a href="/search/?searchtype=author&query=Nikkarinen%2C+I">Irene Nikkarinen</a>, <a href="/search/?searchtype=author&query=Krizhanovsky%2C+A">Andrew Krizhanovsky</a>, <a href="/search/?searchtype=author&query=Pimentel%2C+T">Tiago Pimentel</a>, <a href="/search/?searchtype=author&query=Hennigen%2C+L+T">Lucas Torroba Hennigen</a>, <a href="/search/?searchtype=author&query=Kirov%2C+C">Christo Kirov</a>, <a href="/search/?searchtype=author&query=Nicolai%2C+G">Garrett Nicolai</a>, <a href="/search/?searchtype=author&query=Williams%2C+A">Adina Williams</a>, <a href="/search/?searchtype=author&query=Anastasopoulos%2C+A">Antonios Anastasopoulos</a>, <a href="/search/?searchtype=author&query=Cruz%2C+H">Hilaria Cruz</a>, <a href="/search/?searchtype=author&query=Chodroff%2C+E">Eleanor Chodroff</a> , et al. (3 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.11572v2-abstract-short" style="display: inline;"> A broad goal in natural language processing (NLP) is to develop a system that has the capacity to process any natural language. Most systems, however, are developed using data from just one language such as English. The SIGMORPHON 2020 shared task on morphological reinflection aims to investigate systems' ability to generalize across typologically distinct languages, many of which are low resource… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.11572v2-abstract-full').style.display = 'inline'; document.getElementById('2006.11572v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.11572v2-abstract-full" style="display: none;"> A broad goal in natural language processing (NLP) is to develop a system that has the capacity to process any natural language. Most systems, however, are developed using data from just one language such as English. The SIGMORPHON 2020 shared task on morphological reinflection aims to investigate systems' ability to generalize across typologically distinct languages, many of which are low resource. Systems were developed using data from 45 languages and just 5 language families, fine-tuned with data from an additional 45 languages and 10 language families (13 in total), and evaluated on all 90 languages. A total of 22 systems (19 neural) from 10 teams were submitted to the task. All four winning systems were neural (two monolingual transformers and two massively multilingual RNN-based models with gated attention). Most teams demonstrate utility of data hallucination and augmentation, ensembles, and multilingual training for low-resource languages. Non-neural learners and manually designed grammars showed competitive and even superior performance on some languages (such as Ingrian, Tajik, Tagalog, Zarma, Lingala), especially with very limited data. Some language families (Afro-Asiatic, Niger-Congo, Turkic) were relatively easy for most systems and achieved over 90% mean accuracy while others were more challenging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.11572v2-abstract-full').style.display = 'none'; document.getElementById('2006.11572v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">39 pages, SIGMORPHON</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.13962">arXiv:2005.13962</a> <span> [<a href="https://arxiv.org/pdf/2005.13962">pdf</a>, <a href="https://arxiv.org/format/2005.13962">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Corpus for Large-Scale Phonetic Typology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Salesky%2C+E">Elizabeth Salesky</a>, <a href="/search/?searchtype=author&query=Chodroff%2C+E">Eleanor Chodroff</a>, <a href="/search/?searchtype=author&query=Pimentel%2C+T">Tiago Pimentel</a>, <a href="/search/?searchtype=author&query=Wiesner%2C+M">Matthew Wiesner</a>, <a href="/search/?searchtype=author&query=Cotterell%2C+R">Ryan Cotterell</a>, <a href="/search/?searchtype=author&query=Black%2C+A+W">Alan W Black</a>, <a href="/search/?searchtype=author&query=Eisner%2C+J">Jason Eisner</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.13962v1-abstract-short" style="display: inline;"> A major hurdle in data-driven research on typology is having sufficient data in many languages to draw meaningful conclusions. We present VoxClamantis v1.0, the first large-scale corpus for phonetic typology, with aligned segments and estimated phoneme-level labels in 690 readings spanning 635 languages, along with acoustic-phonetic measures of vowels and sibilants. Access to such data can greatly… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.13962v1-abstract-full').style.display = 'inline'; document.getElementById('2005.13962v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.13962v1-abstract-full" style="display: none;"> A major hurdle in data-driven research on typology is having sufficient data in many languages to draw meaningful conclusions. We present VoxClamantis v1.0, the first large-scale corpus for phonetic typology, with aligned segments and estimated phoneme-level labels in 690 readings spanning 635 languages, along with acoustic-phonetic measures of vowels and sibilants. Access to such data can greatly facilitate investigation of phonetic typology at a large scale and across many languages. However, it is non-trivial and computationally intensive to obtain such alignments for hundreds of languages, many of which have few to no resources presently available. We describe the methodology to create our corpus, discuss caveats with current methods and their impact on the utility of this data, and illustrate possible research directions through a series of case studies on the 48 highest-quality readings. Our corpus and scripts are publicly available for non-commercial use at https://voxclamantisproject.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.13962v1-abstract-full').style.display = 'none'; document.getElementById('2005.13962v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.00626">arXiv:2005.00626</a> <span> [<a href="https://arxiv.org/pdf/2005.00626">pdf</a>, <a href="https://arxiv.org/format/2005.00626">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Predicting Declension Class from Form and Meaning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Williams%2C+A">Adina Williams</a>, <a href="/search/?searchtype=author&query=Pimentel%2C+T">Tiago Pimentel</a>, <a href="/search/?searchtype=author&query=McCarthy%2C+A+D">Arya D. McCarthy</a>, <a href="/search/?searchtype=author&query=Blix%2C+H">Hagen Blix</a>, <a href="/search/?searchtype=author&query=Chodroff%2C+E">Eleanor Chodroff</a>, <a href="/search/?searchtype=author&query=Cotterell%2C+R">Ryan Cotterell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.00626v2-abstract-short" style="display: inline;"> The noun lexica of many natural languages are divided into several declension classes with characteristic morphological properties. Class membership is far from deterministic, but the phonological form of a noun and/or its meaning can often provide imperfect clues. Here, we investigate the strength of those clues. More specifically, we operationalize this by measuring how much information, in bits… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.00626v2-abstract-full').style.display = 'inline'; document.getElementById('2005.00626v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.00626v2-abstract-full" style="display: none;"> The noun lexica of many natural languages are divided into several declension classes with characteristic morphological properties. Class membership is far from deterministic, but the phonological form of a noun and/or its meaning can often provide imperfect clues. Here, we investigate the strength of those clues. More specifically, we operationalize this by measuring how much information, in bits, we can glean about declension class from knowing the form and/or meaning of nouns. We know that form and meaning are often also indicative of grammatical gender---which, as we quantitatively verify, can itself share information with declension class---so we also control for gender. We find for two Indo-European languages (Czech and German) that form and meaning respectively share significant amounts of information with class (and contribute additional information above and beyond gender). The three-way interaction between class, form, and meaning (given gender) is also significant. Our study is important for two reasons: First, we introduce a new method that provides additional quantitative support for a classic linguistic finding that form and meaning are relevant for the classification of nouns into declensions. Secondly, we show not only that individual declensions classes vary in the strength of their clues within a language, but also that these variations themselves vary across languages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.00626v2-abstract-full').style.display = 'none'; document.getElementById('2005.00626v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 2 figures, the is the camera-ready version accepted at the 2020 Annual Conference of the Association for Computational Linguistics (ACL 2020)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1811.05553">arXiv:1811.05553</a> <span> [<a href="https://arxiv.org/pdf/1811.05553">pdf</a>, <a href="https://arxiv.org/format/1811.05553">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Corpus Phonetics Tutorial </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chodroff%2C+E">Eleanor Chodroff</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1811.05553v1-abstract-short" style="display: inline;"> Corpus phonetics has become an increasingly popular method of research in linguistic analysis. With advances in speech technology and computational power, large scale processing of speech data has become a viable technique. This tutorial introduces the speech scientist and engineer to various automatic speech processing tools. These include acoustic model creation and forced alignment using the Ka… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.05553v1-abstract-full').style.display = 'inline'; document.getElementById('1811.05553v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1811.05553v1-abstract-full" style="display: none;"> Corpus phonetics has become an increasingly popular method of research in linguistic analysis. With advances in speech technology and computational power, large scale processing of speech data has become a viable technique. This tutorial introduces the speech scientist and engineer to various automatic speech processing tools. These include acoustic model creation and forced alignment using the Kaldi Automatic Speech Recognition Toolkit (Povey et al., 2011), forced alignment using FAVE-align (Rosenfelder et al., 2014), the Montreal Forced Aligner (McAuliffe et al., 2017), and the Penn Phonetics Lab Forced Aligner (Yuan & Liberman, 2008), as well as stop consonant burst alignment using AutoVOT (Keshet et al., 2014). The tutorial provides a general overview of each program, step-by-step instructions for running the program, as well as several tips and tricks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.05553v1-abstract-full').style.display = 'none'; document.getElementById('1811.05553v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2018. </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository