CINXE.COM

Sentence boundary disambiguation - Wikipedia

<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>Sentence boundary disambiguation - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy", "wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"aa85b427-dbd1-4a5a-9c1e-345fa8424bb6","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Sentence_boundary_disambiguation","wgTitle":"Sentence boundary disambiguation","wgCurRevisionId":1245585498,"wgRevisionId":1245585498,"wgArticleId":5950062,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Wikipedia articles needing clarification from February 2015","Tasks of natural language processing"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Sentence_boundary_disambiguation","wgRelevantArticleId":5950062,"wgIsProbablyEditable":true,"wgRelevantPageIsProbablyEditable":true,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive": false,"wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}},"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":6000,"wgRelatedArticlesCompat":[],"wgCentralAuthMobileDomain":false,"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q7451191","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"],"GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false,"wgGEStructuredTaskRejectionReasonTextInputEnabled":false,"wgGELevelingUpEnabledForUser":false};RLSTATE={ "ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.cite.styles":"ready","ext.pygments":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.makeCollapsible.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready","ext.wikimediaBadges":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","ext.pygments.view","site","mediawiki.page.ready","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","ext.popups","ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging", "ext.wikimediaEvents","ext.navigationTiming","ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.growthExperiments.SuggestedEditSession","wikibase.sidebar.tracking"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=ext.cite.styles%7Cext.pygments%2CwikimediaBadges%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediamessages.styles%7Cjquery.makeCollapsible.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&amp;only=styles&amp;skin=vector-2022"> <script async="" src="/w/load.php?lang=en&amp;modules=startup&amp;only=scripts&amp;raw=1&amp;skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=site.styles&amp;only=styles&amp;skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.4"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="Sentence boundary disambiguation - Wikipedia"> <meta property="og:type" content="website"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/Sentence_boundary_disambiguation"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Sentence_boundary_disambiguation&amp;action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/Sentence_boundary_disambiguation"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&amp;feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="//login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-Sentence_boundary_disambiguation rootpage-Sentence_boundary_disambiguation skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page&#039;s font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&amp;utm_medium=sidebar&amp;utm_campaign=C13_en.wikipedia.org&amp;uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&amp;returnto=Sentence+boundary+disambiguation" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&amp;returnto=Sentence+boundary+disambiguation" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&amp;utm_medium=sidebar&amp;utm_campaign=C13_en.wikipedia.org&amp;uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&amp;returnto=Sentence+boundary+disambiguation" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=Sentence+boundary+disambiguation" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-Strategies" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Strategies"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>Strategies</span> </div> </a> <ul id="toc-Strategies-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Software" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Software"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Software</span> </div> </a> <ul id="toc-Software-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-See_also" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#See_also"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>See also</span> </div> </a> <ul id="toc-See_also-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-External_links" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#External_links"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>External links</span> </div> </a> <ul id="toc-External_links-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Sentence boundary disambiguation</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="Go to an article in another language. Available in 3 languages" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-3" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">3 languages</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="interlanguage-link interwiki-fa mw-list-item"><a href="https://fa.wikipedia.org/wiki/%D8%A7%D8%A8%D9%87%D8%A7%D9%85%E2%80%8C%D8%B2%D8%AF%D8%A7%DB%8C%DB%8C_%D9%85%D8%B1%D8%B2_%D8%AC%D9%85%D9%84%D9%87" title="ابهام‌زدایی مرز جمله – Persian" lang="fa" hreflang="fa" data-title="ابهام‌زدایی مرز جمله" data-language-autonym="فارسی" data-language-local-name="Persian" class="interlanguage-link-target"><span>فارسی</span></a></li><li class="interlanguage-link interwiki-uk mw-list-item"><a href="https://uk.wikipedia.org/wiki/%D0%A0%D0%BE%D0%B7%D1%80%D1%96%D0%B7%D0%BD%D0%B5%D0%BD%D0%BD%D1%8F_%D0%BC%D0%B5%D0%B6_%D1%80%D0%B5%D1%87%D0%B5%D0%BD%D0%BD%D1%8F" title="Розрізнення меж речення – Ukrainian" lang="uk" hreflang="uk" data-title="Розрізнення меж речення" data-language-autonym="Українська" data-language-local-name="Ukrainian" class="interlanguage-link-target"><span>Українська</span></a></li><li class="interlanguage-link interwiki-zh-yue mw-list-item"><a href="https://zh-yue.wikipedia.org/wiki/%E5%8F%A5%E5%AD%90%E5%88%87%E5%89%B2" title="句子切割 – Cantonese" lang="yue" hreflang="yue" data-title="句子切割" data-language-autonym="粵語" data-language-local-name="Cantonese" class="interlanguage-link-target"><span>粵語</span></a></li> </ul> <div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q7451191#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Sentence_boundary_disambiguation" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="vector-tab-noicon mw-list-item"><a href="/wiki/Talk:Sentence_boundary_disambiguation" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Sentence_boundary_disambiguation"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Sentence_boundary_disambiguation&amp;action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Sentence_boundary_disambiguation&amp;action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/Sentence_boundary_disambiguation"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Sentence_boundary_disambiguation&amp;action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Sentence_boundary_disambiguation&amp;action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/Sentence_boundary_disambiguation" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/Sentence_boundary_disambiguation" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q"><span>Special pages</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=Sentence_boundary_disambiguation&amp;oldid=1245585498" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=Sentence_boundary_disambiguation&amp;action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&amp;page=Sentence_boundary_disambiguation&amp;id=1245585498&amp;wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&amp;url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FSentence_boundary_disambiguation"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&amp;url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FSentence_boundary_disambiguation"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&amp;page=Sentence_boundary_disambiguation&amp;action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=Sentence_boundary_disambiguation&amp;printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q7451191" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><p><b>Sentence boundary disambiguation</b> (<b>SBD</b>), also known as <b>sentence breaking</b>, <b>sentence boundary detection</b>, and <b>sentence segmentation</b>, is the problem in <a href="/wiki/Natural_language_processing" title="Natural language processing">natural language processing</a> of deciding where <a href="/wiki/Sentence_(linguistics)" title="Sentence (linguistics)">sentences</a> begin and end. Natural language processing tools often require their input to be divided into sentences; however, sentence boundary identification can be challenging due to the potential ambiguity of <a href="/wiki/Punctuation_mark" class="mw-redirect" title="Punctuation mark">punctuation marks</a>. In <a href="/wiki/Standard_written_English" class="mw-redirect" title="Standard written English">written English</a>, a <a href="/wiki/Full_stop" title="Full stop">period</a> may indicate the end of a sentence, or may denote an <a href="/wiki/Abbreviation" title="Abbreviation">abbreviation</a>, a <a href="/wiki/Decimal_point" class="mw-redirect" title="Decimal point">decimal point</a>, an <a href="/wiki/Ellipsis" title="Ellipsis">ellipsis</a>, or an email address, among other possibilities. About 47% of the periods in <i><a href="/wiki/The_Wall_Street_Journal" title="The Wall Street Journal">The Wall Street Journal</a></i> <a href="/wiki/Text_corpus" title="Text corpus">corpus</a> denote abbreviations.<sup id="cite_ref-1" class="reference"><a href="#cite_note-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> <a href="/wiki/Question_mark" title="Question mark">Question marks</a> and <a href="/wiki/Exclamation_marks" class="mw-redirect" title="Exclamation marks">exclamation marks</a> can be similarly ambiguous due to use in <a href="/wiki/Emoticon" title="Emoticon">emoticons</a>, <a href="/wiki/Source_code" title="Source code">source code</a>, and <a href="/wiki/Slang" title="Slang">slang</a>. </p><p>Some languages including Japanese and Chinese have unambiguous sentence-ending markers. </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="Strategies">Strategies</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Sentence_boundary_disambiguation&amp;action=edit&amp;section=1" title="Edit section: Strategies"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The standard '<a href="/wiki/Vanilla_(computing)" class="mw-redirect" title="Vanilla (computing)">vanilla</a>' approach to locate the end of a sentence:<sup class="noprint Inline-Template" style="margin-left:0.1em; white-space:nowrap;">&#91;<i><a href="/wiki/Wikipedia:Please_clarify" title="Wikipedia:Please clarify"><span title="The text near this tag may need clarification or removal of jargon. (February 2015)">clarification needed</span></a></i>&#93;</sup> </p> <dl><dd>(a) If it is a period, it ends a sentence.</dd> <dd>(b) If the preceding token is in the hand-compiled <a href="/wiki/List_of_abbreviations" class="mw-redirect" title="List of abbreviations">list of abbreviations</a>, then it does not end a sentence.</dd> <dd>(c) If the next token is capitalized, then it ends a sentence.</dd></dl> <p>This strategy gets about 95% of sentences correct.<sup id="cite_ref-2" class="reference"><a href="#cite_note-2"><span class="cite-bracket">&#91;</span>2<span class="cite-bracket">&#93;</span></a></sup> Things such as shortened names, e.g. "<a href="/wiki/D._H._Lawrence" title="D. H. Lawrence">D. H. Lawrence</a>" (with <a href="/wiki/Space_(punctuation)" title="Space (punctuation)">whitespaces</a> between the individual words that form the full name), idiosyncratic orthographical spellings used for stylistic purposes (often referring to a single concept, e.g. an entertainment product title like "<a href="/wiki/.hack//SIGN" class="mw-redirect" title=".hack//SIGN">.hack//SIGN</a>") and usage of non-standard punctuation (or non-standard usage <i>of</i> punctuation) in a text often fall under the remaining 5%. </p><p>Another approach is to automatically learn a set of rules from a set of documents where the sentence breaks are pre-marked. Solutions have been based on a <a href="/wiki/Maximum_entropy_model" class="mw-redirect" title="Maximum entropy model">maximum entropy model</a>.<sup id="cite_ref-3" class="reference"><a href="#cite_note-3"><span class="cite-bracket">&#91;</span>3<span class="cite-bracket">&#93;</span></a></sup> The SATZ<sup id="cite_ref-satz_4-0" class="reference"><a href="#cite_note-satz-4"><span class="cite-bracket">&#91;</span>4<span class="cite-bracket">&#93;</span></a></sup> architecture uses a neural network to disambiguate sentence boundaries and achieves 98.5% accuracy. </p> <div class="mw-heading mw-heading2"><h2 id="Software">Software</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Sentence_boundary_disambiguation&amp;action=edit&amp;section=2" title="Edit section: Software"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <dl><dt>Examples of use of Perl compatible <a href="/wiki/Regex" class="mw-redirect" title="Regex">regular expressions</a> ("<a href="/wiki/Perl_Compatible_Regular_Expressions" title="Perl Compatible Regular Expressions">PCRE</a>")</dt> <dd> <ul><li><code class="mw-highlight mw-highlight-lang-ragel mw-content-ltr" dir="ltr"><span class="o">((?</span><span class="err">&lt;</span><span class="o">=</span><span class="s">[a-z0-9][.?!]</span><span class="o">)|(?</span><span class="err">&lt;</span><span class="o">=</span><span class="s">[a-z0-9][.?!]</span><span class="err">\&quot;</span><span class="o">))(</span><span class="err">\</span><span class="nv">s</span><span class="o">|</span><span class="err">\</span><span class="nv">r</span><span class="err">\</span><span class="nv">n</span><span class="o">)(?=</span><span class="err">\&quot;</span><span class="o">?</span><span class="s">[A-Z]</span><span class="o">)</span></code></li> <li><code class="mw-highlight mw-highlight-lang-php mw-content-ltr" dir="ltr"><span class="nv">$sentences</span> <span class="o">=</span> <span class="nb">preg_split</span><span class="p">(</span><span class="s2">&quot;/(?&lt;!\..)([\?\!\.]+)\s(?!.\.)/&quot;</span><span class="p">,</span> <span class="nv">$text</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="nx">PREG_SPLIT_DELIM_CAPTURE</span><span class="p">);</span></code> (for <a href="/wiki/PHP" title="PHP">PHP</a>)</li></ul></dd></dl> <dl><dt>Online use, libraries, and APIs</dt> <dd> <ul><li>sent_detector&#160;&#8211;&#32;Java<sup id="cite_ref-5" class="reference"><a href="#cite_note-5"><span class="cite-bracket">&#91;</span>5<span class="cite-bracket">&#93;</span></a></sup></li> <li>Lingua-EN-Sentence&#160;&#8211;&#32;perl<sup id="cite_ref-6" class="reference"><a href="#cite_note-6"><span class="cite-bracket">&#91;</span>6<span class="cite-bracket">&#93;</span></a></sup></li> <li>Sentence.pm&#160;&#8211;&#32;perl<sup id="cite_ref-7" class="reference"><a href="#cite_note-7"><span class="cite-bracket">&#91;</span>7<span class="cite-bracket">&#93;</span></a></sup></li> <li>SATZ&#160;&#8211;&#32;An Adaptive Sentence Segmentation System&#160;&#8211;&#32;by David D. Palmer&#160;&#8211;&#32;C<sup id="cite_ref-satz_4-1" class="reference"><a href="#cite_note-satz-4"><span class="cite-bracket">&#91;</span>4<span class="cite-bracket">&#93;</span></a></sup></li></ul></dd></dl> <dl><dt>Toolkits that include sentence detection</dt> <dd> <ul><li><a href="/wiki/OpenNLP" class="mw-redirect" title="OpenNLP">Apache OpenNLP</a><sup id="cite_ref-8" class="reference"><a href="#cite_note-8"><span class="cite-bracket">&#91;</span>8<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/w/index.php?title=Freeling_(software)&amp;action=edit&amp;redlink=1" class="new" title="Freeling (software) (page does not exist)">Freeling (software)</a><sup id="cite_ref-9" class="reference"><a href="#cite_note-9"><span class="cite-bracket">&#91;</span>9<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Natural_Language_Toolkit" title="Natural Language Toolkit">Natural Language Toolkit</a><sup id="cite_ref-10" class="reference"><a href="#cite_note-10"><span class="cite-bracket">&#91;</span>10<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/w/index.php?title=Stanford_NLP&amp;action=edit&amp;redlink=1" class="new" title="Stanford NLP (page does not exist)">Stanford NLP</a><sup id="cite_ref-11" class="reference"><a href="#cite_note-11"><span class="cite-bracket">&#91;</span>11<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/w/index.php?title=GExp&amp;action=edit&amp;redlink=1" class="new" title="GExp (page does not exist)">GExp</a><sup id="cite_ref-12" class="reference"><a href="#cite_note-12"><span class="cite-bracket">&#91;</span>12<span class="cite-bracket">&#93;</span></a></sup></li> <li>CogComp-NLP<sup id="cite_ref-13" class="reference"><a href="#cite_note-13"><span class="cite-bracket">&#91;</span>13<span class="cite-bracket">&#93;</span></a></sup></li></ul></dd></dl> <div class="mw-heading mw-heading2"><h2 id="See_also">See also</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Sentence_boundary_disambiguation&amp;action=edit&amp;section=3" title="Edit section: See also"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a href="/wiki/Multiword_expression" title="Multiword expression">Multiword expression</a></li> <li><a href="/wiki/Punctuation" title="Punctuation">Punctuation</a></li> <li><a href="/wiki/Sentence_extraction" title="Sentence extraction">Sentence extraction</a></li> <li><a href="/wiki/Sentence_spacing" title="Sentence spacing">Sentence spacing</a></li> <li><a href="/wiki/Speech_segmentation" title="Speech segmentation">Speech segmentation</a></li> <li><a href="/wiki/Syllabification" title="Syllabification">Syllabification</a></li> <li><a href="/wiki/Text_segmentation" title="Text segmentation">Text segmentation</a></li> <li><a href="/wiki/Translation_memory" title="Translation memory">Translation memory</a></li> <li><a href="/wiki/Word_divider" title="Word divider">Word divider</a></li></ul> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Sentence_boundary_disambiguation&amp;action=edit&amp;section=4" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1239543626">.mw-parser-output .reflist{margin-bottom:0.5em;list-style-type:decimal}@media screen{.mw-parser-output .reflist{font-size:90%}}.mw-parser-output .reflist .references{font-size:100%;margin-bottom:0;list-style-type:inherit}.mw-parser-output .reflist-columns-2{column-width:30em}.mw-parser-output .reflist-columns-3{column-width:25em}.mw-parser-output .reflist-columns{margin-top:0.3em}.mw-parser-output .reflist-columns ol{margin-top:0}.mw-parser-output .reflist-columns li{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .reflist-upper-alpha{list-style-type:upper-alpha}.mw-parser-output .reflist-upper-roman{list-style-type:upper-roman}.mw-parser-output .reflist-lower-alpha{list-style-type:lower-alpha}.mw-parser-output .reflist-lower-greek{list-style-type:lower-greek}.mw-parser-output .reflist-lower-roman{list-style-type:lower-roman}</style><div class="reflist"> <div class="mw-references-wrap mw-references-columns"><ol class="references"> <li id="cite_note-1"><span class="mw-cite-backlink"><b><a href="#cite_ref-1">^</a></b></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite id="CITEREFE._StamatatosN._FakotakisG._Kokkinakis" class="citation conference cs1">E. Stamatatos; N. Fakotakis &amp; G. Kokkinakis. <a rel="nofollow" class="external text" href="https://www.researchgate.net/publication/2605947">"Automatic extraction of rules for sentence boundary disambiguation"</a>. <i>Proceedings of the Workshop on Machine Learning in Human Language Technology</i>. University of Patras. pp.&#160;88–92.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=Automatic+extraction+of+rules+for+sentence+boundary+disambiguation&amp;rft.btitle=Proceedings+of+the+Workshop+on+Machine+Learning+in+Human+Language+Technology&amp;rft.pages=88-92&amp;rft.pub=University+of+Patras&amp;rft.au=E.+Stamatatos&amp;rft.au=N.+Fakotakis&amp;rft.au=G.+Kokkinakis&amp;rft_id=https%3A%2F%2Fwww.researchgate.net%2Fpublication%2F2605947&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ASentence+boundary+disambiguation" class="Z3988"></span></span> </li> <li id="cite_note-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-2">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFO&#39;Neil" class="citation web cs1">O'Neil, John. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20090221004022/http://www.attivio.com/attivio/blog/263-doing-things-with-words-part-two-sentence-boundary-detection.html">"Doing Things with Words, Part Two: Sentence Boundary Detection"</a>. Archived from <a rel="nofollow" class="external text" href="http://www.attivio.com/attivio/blog/263-doing-things-with-words-part-two-sentence-boundary-detection.html">the original</a> on 2009-02-21<span class="reference-accessdate">. Retrieved <span class="nowrap">2009-01-03</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Doing+Things+with+Words%2C+Part+Two%3A+Sentence+Boundary+Detection&amp;rft.aulast=O%27Neil&amp;rft.aufirst=John&amp;rft_id=http%3A%2F%2Fwww.attivio.com%2Fattivio%2Fblog%2F263-doing-things-with-words-part-two-sentence-boundary-detection.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ASentence+boundary+disambiguation" class="Z3988"></span></span> </li> <li id="cite_note-3"><span class="mw-cite-backlink"><b><a href="#cite_ref-3">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFReynarRatnaparkhi" class="citation web cs1">Reynar, JC; Ratnaparkhi, A. <a rel="nofollow" class="external text" href="http://www.aclweb.org/anthology/A/A97/A97-1004.pdf">"A Maximum Entropy Approach to Identifying Sentence Boundaries"</a> <span class="cs1-format">(PDF)</span><span class="reference-accessdate">. Retrieved <span class="nowrap">2009-01-03</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=A+Maximum+Entropy+Approach+to+Identifying+Sentence+Boundaries&amp;rft.aulast=Reynar&amp;rft.aufirst=JC&amp;rft.au=Ratnaparkhi%2C+A&amp;rft_id=http%3A%2F%2Fwww.aclweb.org%2Fanthology%2FA%2FA97%2FA97-1004.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ASentence+boundary+disambiguation" class="Z3988"></span></span> </li> <li id="cite_note-satz-4"><span class="mw-cite-backlink">^ <a href="#cite_ref-satz_4-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-satz_4-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://web.archive.org/web/20070922132340/http://elib.cs.berkeley.edu/src/satz/">"SATZ: An Adaptive Sentence Boundary Detector"</a>. Archived from <a rel="nofollow" class="external text" href="http://elib.cs.berkeley.edu/src/satz/">the original</a> on 2007-09-22.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=SATZ%3A+An+Adaptive+Sentence+Boundary+Detector&amp;rft_id=http%3A%2F%2Felib.cs.berkeley.edu%2Fsrc%2Fsatz%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ASentence+boundary+disambiguation" class="Z3988"></span></span> </li> <li id="cite_note-5"><span class="mw-cite-backlink"><b><a href="#cite_ref-5">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://web.archive.org/web/20071112103940/http://text0.mib.man.ac.uk:8080/scottpiao/sent_detector">"SentParBreaker Web page"</a>. Archived from <a rel="nofollow" class="external text" href="http://text0.mib.man.ac.uk:8080/scottpiao/sent_detector">the original</a> on 2007-11-12.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=SentParBreaker+Web+page&amp;rft_id=http%3A%2F%2Ftext0.mib.man.ac.uk%3A8080%2Fscottpiao%2Fsent_detector&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ASentence+boundary+disambiguation" class="Z3988"></span></span> </li> <li id="cite_note-6"><span class="mw-cite-backlink"><b><a href="#cite_ref-6">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://metacpan.org/release/SHLOMOY/Lingua-EN-Sentence-0.25">"Lingua-EN-Sentence-0.25 - Module for splitting text into sentences. - metacpan.org"</a>. <i>metacpan.org</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=metacpan.org&amp;rft.atitle=Lingua-EN-Sentence-0.25+-+Module+for+splitting+text+into+sentences.+-+metacpan.org&amp;rft_id=https%3A%2F%2Fmetacpan.org%2Frelease%2FSHLOMOY%2FLingua-EN-Sentence-0.25&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ASentence+boundary+disambiguation" class="Z3988"></span></span> </li> <li id="cite_note-7"><span class="mw-cite-backlink"><b><a href="#cite_ref-7">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://metacpan.org/release/TGROSE/HTML-Summary-0.017/view/lib/Text/Sentence.pm">"Text::Sentence - module for splitting text into sentences - metacpan.org"</a>. <i>metacpan.org</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=metacpan.org&amp;rft.atitle=Text%3A%3ASentence+-+module+for+splitting+text+into+sentences+-+metacpan.org&amp;rft_id=https%3A%2F%2Fmetacpan.org%2Frelease%2FTGROSE%2FHTML-Summary-0.017%2Fview%2Flib%2FText%2FSentence.pm&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ASentence+boundary+disambiguation" class="Z3988"></span></span> </li> <li id="cite_note-8"><span class="mw-cite-backlink"><b><a href="#cite_ref-8">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://opennlp.apache.org/">"Apache OpenNLP"</a>. <i>opennlp.apache.org</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=opennlp.apache.org&amp;rft.atitle=Apache+OpenNLP&amp;rft_id=https%3A%2F%2Fopennlp.apache.org%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ASentence+boundary+disambiguation" class="Z3988"></span></span> </li> <li id="cite_note-9"><span class="mw-cite-backlink"><b><a href="#cite_ref-9">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://nlp.lsi.upc.edu/freeling/">"Welcome | FreeLing Home Page"</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Welcome+%7C+FreeLing+Home+Page&amp;rft_id=https%3A%2F%2Fnlp.lsi.upc.edu%2Ffreeling%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ASentence+boundary+disambiguation" class="Z3988"></span></span> </li> <li id="cite_note-10"><span class="mw-cite-backlink"><b><a href="#cite_ref-10">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.nltk.org/">"NLTK&#160;:: Natural Language Toolkit"</a>. <i>www.nltk.org</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=www.nltk.org&amp;rft.atitle=NLTK+%3A%3A+Natural+Language+Toolkit&amp;rft_id=https%3A%2F%2Fwww.nltk.org%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ASentence+boundary+disambiguation" class="Z3988"></span></span> </li> <li id="cite_note-11"><span class="mw-cite-backlink"><b><a href="#cite_ref-11">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://nlp.stanford.edu/software/index.shtml">"Software - The Stanford Natural Language Processing Group"</a>. <i>nlp.stanford.edu</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=nlp.stanford.edu&amp;rft.atitle=Software+-+The+Stanford+Natural+Language+Processing+Group&amp;rft_id=https%3A%2F%2Fnlp.stanford.edu%2Fsoftware%2Findex.shtml&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ASentence+boundary+disambiguation" class="Z3988"></span></span> </li> <li id="cite_note-12"><span class="mw-cite-backlink"><b><a href="#cite_ref-12">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://code.google.com/archive/p/graph-expression/wikis/SentenceSplitting.wiki">"Google Code Archive - Long-term storage for Google Code Project Hosting"</a>. <i>code.google.com</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=code.google.com&amp;rft.atitle=Google+Code+Archive+-+Long-term+storage+for+Google+Code+Project+Hosting.&amp;rft_id=https%3A%2F%2Fcode.google.com%2Farchive%2Fp%2Fgraph-expression%2Fwikis%2FSentenceSplitting.wiki&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ASentence+boundary+disambiguation" class="Z3988"></span></span> </li> <li id="cite_note-13"><span class="mw-cite-backlink"><b><a href="#cite_ref-13">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://github.com/CogComp/cogcomp-nlp">"CogCompNLP"</a>. January 2, 2024 &#8211; via GitHub.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=CogCompNLP&amp;rft.date=2024-01-02&amp;rft_id=https%3A%2F%2Fgithub.com%2FCogComp%2Fcogcomp-nlp&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ASentence+boundary+disambiguation" class="Z3988"></span></span> </li> </ol></div></div> <div class="mw-heading mw-heading2"><h2 id="External_links">External links</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Sentence_boundary_disambiguation&amp;action=edit&amp;section=5" title="Edit section: External links"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a rel="nofollow" class="external text" href="https://spacy.io/universe/project/python-sentence-boundary-disambiguation">pySBD - python Sentence Boundary Disambiguation</a></li></ul> <div class="navbox-styles"><style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1236075235">.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}body.skin--responsive .mw-parser-output .navbox-image img{max-width:none!important}@media print{body.ns-0 .mw-parser-output .navbox{display:none!important}}</style></div><div role="navigation" class="navbox" aria-labelledby="Natural_language_processing" style="padding:3px"><table class="nowraplinks hlist mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Natural_language_processing" title="Template:Natural language processing"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Natural_language_processing" title="Template talk:Natural language processing"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Natural_language_processing" title="Special:EditPage/Template:Natural language processing"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Natural_language_processing" style="font-size:114%;margin:0 4em"><a href="/wiki/Natural_language_processing" title="Natural language processing">Natural language processing</a></div></th></tr><tr><th scope="row" class="navbox-group" style="width:1%">General terms</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/AI-complete" title="AI-complete">AI-complete</a></li> <li><a href="/wiki/Bag-of-words_model" title="Bag-of-words model">Bag-of-words</a></li> <li><a href="/wiki/N-gram" title="N-gram">n-gram</a> <ul><li><a href="/wiki/Bigram" title="Bigram">Bigram</a></li> <li><a href="/wiki/Trigram" title="Trigram">Trigram</a></li></ul></li> <li><a href="/wiki/Computational_linguistics" title="Computational linguistics">Computational linguistics</a></li> <li><a href="/wiki/Natural_language_understanding" title="Natural language understanding">Natural language understanding</a></li> <li><a href="/wiki/Stop_word" title="Stop word">Stop words</a></li> <li><a href="/wiki/Text_processing" title="Text processing">Text processing</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Text_mining" title="Text mining">Text analysis</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Argument_mining" title="Argument mining">Argument mining</a></li> <li><a href="/wiki/Collocation_extraction" title="Collocation extraction">Collocation extraction</a></li> <li><a href="/wiki/Concept_mining" title="Concept mining">Concept mining</a></li> <li><a href="/wiki/Coreference#Coreference_resolution" title="Coreference">Coreference resolution</a></li> <li><a href="/wiki/Deep_linguistic_processing" title="Deep linguistic processing">Deep linguistic processing</a></li> <li><a href="/wiki/Distant_reading" title="Distant reading">Distant reading</a></li> <li><a href="/wiki/Information_extraction" title="Information extraction">Information extraction</a></li> <li><a href="/wiki/Named-entity_recognition" title="Named-entity recognition">Named-entity recognition</a></li> <li><a href="/wiki/Ontology_learning" title="Ontology learning">Ontology learning</a></li> <li><a href="/wiki/Parsing" title="Parsing">Parsing</a> <ul><li><a href="/wiki/Semantic_parsing" title="Semantic parsing">Semantic parsing</a></li> <li><a href="/wiki/Syntactic_parsing_(computational_linguistics)" title="Syntactic parsing (computational linguistics)">Syntactic parsing</a></li></ul></li> <li><a href="/wiki/Part-of-speech_tagging" title="Part-of-speech tagging">Part-of-speech tagging</a></li> <li><a href="/wiki/Semantic_analysis_(machine_learning)" title="Semantic analysis (machine learning)">Semantic analysis</a></li> <li><a href="/wiki/Semantic_role_labeling" title="Semantic role labeling">Semantic role labeling</a></li> <li><a href="/wiki/Semantic_decomposition_(natural_language_processing)" title="Semantic decomposition (natural language processing)">Semantic decomposition</a></li> <li><a href="/wiki/Semantic_similarity" title="Semantic similarity">Semantic similarity</a></li> <li><a href="/wiki/Sentiment_analysis" title="Sentiment analysis">Sentiment analysis</a></li></ul> <ul><li><a href="/wiki/Terminology_extraction" title="Terminology extraction">Terminology extraction</a></li> <li><a href="/wiki/Text_mining" title="Text mining">Text mining</a></li> <li><a href="/wiki/Textual_entailment" title="Textual entailment">Textual entailment</a></li> <li><a href="/wiki/Truecasing" title="Truecasing">Truecasing</a></li> <li><a href="/wiki/Word-sense_disambiguation" title="Word-sense disambiguation">Word-sense disambiguation</a></li> <li><a href="/wiki/Word-sense_induction" title="Word-sense induction">Word-sense induction</a></li></ul> </div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th id="Text_segmentation" scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Text_segmentation" title="Text segmentation">Text segmentation</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Compound-term_processing" title="Compound-term processing">Compound-term processing</a></li> <li><a href="/wiki/Lemmatisation" class="mw-redirect" title="Lemmatisation">Lemmatisation</a></li> <li><a href="/wiki/Lexical_analysis" title="Lexical analysis">Lexical analysis</a></li> <li><a href="/wiki/Shallow_parsing" title="Shallow parsing">Text chunking</a></li> <li><a href="/wiki/Stemming" title="Stemming">Stemming</a></li> <li><a class="mw-selflink selflink">Sentence segmentation</a></li> <li><a href="/wiki/Word#Word_boundaries" title="Word">Word segmentation</a></li></ul> </div></td></tr></tbody></table><div> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Automatic_summarization" title="Automatic summarization">Automatic summarization</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Multi-document_summarization" title="Multi-document summarization">Multi-document summarization</a></li> <li><a href="/wiki/Sentence_extraction" title="Sentence extraction">Sentence extraction</a></li> <li><a href="/wiki/Text_simplification" title="Text simplification">Text simplification</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Machine_translation" title="Machine translation">Machine translation</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Computer-assisted_translation" title="Computer-assisted translation">Computer-assisted</a></li> <li><a href="/wiki/Example-based_machine_translation" title="Example-based machine translation">Example-based</a></li> <li><a href="/wiki/Rule-based_machine_translation" title="Rule-based machine translation">Rule-based</a></li> <li><a href="/wiki/Statistical_machine_translation" title="Statistical machine translation">Statistical</a></li> <li><a href="/wiki/Transfer-based_machine_translation" title="Transfer-based machine translation">Transfer-based</a></li> <li><a href="/wiki/Neural_machine_translation" title="Neural machine translation">Neural</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Distributional_semantics" title="Distributional semantics">Distributional semantics</a> models</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/BERT_(language_model)" title="BERT (language model)">BERT</a></li> <li><a href="/wiki/Document-term_matrix" title="Document-term matrix">Document-term matrix</a></li> <li><a href="/wiki/Explicit_semantic_analysis" title="Explicit semantic analysis">Explicit semantic analysis</a></li> <li><a href="/wiki/FastText" title="FastText">fastText</a></li> <li><a href="/wiki/GloVe" title="GloVe">GloVe</a></li> <li><a href="/wiki/Language_model" title="Language model">Language model</a> (<a href="/wiki/Large_language_model" title="Large language model">large</a>)</li> <li><a href="/wiki/Latent_semantic_analysis" title="Latent semantic analysis">Latent semantic analysis</a></li> <li><a href="/wiki/Seq2seq" title="Seq2seq">Seq2seq</a></li> <li><a href="/wiki/Word_embedding" title="Word embedding">Word embedding</a></li> <li><a href="/wiki/Word2vec" title="Word2vec">Word2vec</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Language_resource" title="Language resource">Language resources</a>,<br />datasets and corpora</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Types and<br />standards</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Corpus_linguistics" title="Corpus linguistics">Corpus linguistics</a></li> <li><a href="/wiki/Lexical_resource" title="Lexical resource">Lexical resource</a></li> <li><a href="/wiki/Linguistic_Linked_Open_Data" title="Linguistic Linked Open Data">Linguistic Linked Open Data</a></li> <li><a href="/wiki/Machine-readable_dictionary" title="Machine-readable dictionary">Machine-readable dictionary</a></li> <li><a href="/wiki/Parallel_text" title="Parallel text">Parallel text</a></li> <li><a href="/wiki/PropBank" title="PropBank">PropBank</a></li> <li><a href="/wiki/Semantic_network" title="Semantic network">Semantic network</a></li> <li><a href="/wiki/Simple_Knowledge_Organization_System" title="Simple Knowledge Organization System">Simple Knowledge Organization System</a></li> <li><a href="/wiki/Speech_corpus" title="Speech corpus">Speech corpus</a></li> <li><a href="/wiki/Text_corpus" title="Text corpus">Text corpus</a></li> <li><a href="/wiki/Thesaurus_(information_retrieval)" title="Thesaurus (information retrieval)">Thesaurus (information retrieval)</a></li> <li><a href="/wiki/Treebank" title="Treebank">Treebank</a></li> <li><a href="/wiki/Universal_Dependencies" title="Universal Dependencies">Universal Dependencies</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Data</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/BabelNet" title="BabelNet">BabelNet</a></li> <li><a href="/wiki/Bank_of_English" title="Bank of English">Bank of English</a></li> <li><a href="/wiki/DBpedia" title="DBpedia">DBpedia</a></li> <li><a href="/wiki/FrameNet" title="FrameNet">FrameNet</a></li> <li><a href="/wiki/Google_Ngram_Viewer" class="mw-redirect" title="Google Ngram Viewer">Google Ngram Viewer</a></li> <li><a href="/wiki/UBY" title="UBY">UBY</a></li> <li><a href="/wiki/WordNet" title="WordNet">WordNet</a></li> <li><a href="/wiki/Wikidata" title="Wikidata">Wikidata</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Automatic_identification_and_data_capture" title="Automatic identification and data capture">Automatic identification<br />and data capture</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Speech_recognition" title="Speech recognition">Speech recognition</a></li> <li><a href="/wiki/Speech_segmentation" title="Speech segmentation">Speech segmentation</a></li> <li><a href="/wiki/Speech_synthesis" title="Speech synthesis">Speech synthesis</a></li> <li><a href="/wiki/Natural_language_generation" title="Natural language generation">Natural language generation</a></li> <li><a href="/wiki/Optical_character_recognition" title="Optical character recognition">Optical character recognition</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Topic_model" title="Topic model">Topic model</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Document_classification" title="Document classification">Document classification</a></li> <li><a href="/wiki/Latent_Dirichlet_allocation" title="Latent Dirichlet allocation">Latent Dirichlet allocation</a></li> <li><a href="/wiki/Pachinko_allocation" title="Pachinko allocation">Pachinko allocation</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Computer-assisted_reviewing" title="Computer-assisted reviewing">Computer-assisted<br />reviewing</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Automated_essay_scoring" title="Automated essay scoring">Automated essay scoring</a></li> <li><a href="/wiki/Concordancer" title="Concordancer">Concordancer</a></li> <li><a href="/wiki/Grammar_checker" title="Grammar checker">Grammar checker</a></li> <li><a href="/wiki/Predictive_text" title="Predictive text">Predictive text</a></li> <li><a href="/wiki/Pronunciation_assessment" title="Pronunciation assessment">Pronunciation assessment</a></li> <li><a href="/wiki/Spell_checker" title="Spell checker">Spell checker</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Natural-language_user_interface" title="Natural-language user interface">Natural language<br />user interface</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Chatbot" title="Chatbot">Chatbot</a></li> <li><a href="/wiki/Interactive_fiction" title="Interactive fiction">Interactive fiction</a> (c.f. <a href="/wiki/Syntax_guessing" class="mw-redirect" title="Syntax guessing">Syntax guessing</a>)</li> <li><a href="/wiki/Question_answering" title="Question answering">Question answering</a></li> <li><a href="/wiki/Virtual_assistant" title="Virtual assistant">Virtual assistant</a></li> <li><a href="/wiki/Voice_user_interface" title="Voice user interface">Voice user interface</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Related</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Formal_semantics_(natural_language)" title="Formal semantics (natural language)">Formal semantics</a></li> <li><a href="/wiki/Hallucination_(artificial_intelligence)" title="Hallucination (artificial intelligence)">Hallucination</a></li> <li><a href="/wiki/Natural_Language_Toolkit" title="Natural Language Toolkit">Natural Language Toolkit</a></li> <li><a href="/wiki/SpaCy" title="SpaCy">spaCy</a></li></ul> </div></td></tr></tbody></table></div> <!-- NewPP limit report Parsed by mw‐web.eqiad.canary‐7584787d95‐f2q89 Cached time: 20241107215031 Cache expiry: 2592000 Reduced expiry: false Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 0.317 seconds Real time usage: 0.537 seconds Preprocessor visited node count: 901/1000000 Post‐expand include size: 44469/2097152 bytes Template argument size: 507/2097152 bytes Highest expansion depth: 12/100 Expensive parser function count: 4/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 52554/5000000 bytes Lua time usage: 0.192/10.000 seconds Lua memory usage: 4444854/52428800 bytes Number of Wikibase entities loaded: 0/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 463.583 1 -total 33.86% 156.986 1 Template:Reflist 20.87% 96.762 1 Template:Natural_language_processing 20.55% 95.286 3 Template:Navbox 18.35% 85.068 1 Template:Cite_conference 13.63% 63.196 1 Template:Clarify 9.72% 45.058 12 Template:Cite_web 9.34% 43.310 1 Template:Fix-span 6.55% 30.360 2 Template:Category_handler 1.46% 6.775 1 Template:Delink --> <!-- Saved in parser cache with key enwiki:pcache:idhash:5950062-0!canonical and timestamp 20241107215031 and revision id 1245585498. Rendering was triggered because: page-view --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?type=1x1" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=Sentence_boundary_disambiguation&amp;oldid=1245585498">https://en.wikipedia.org/w/index.php?title=Sentence_boundary_disambiguation&amp;oldid=1245585498</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Category</a>: <ul><li><a href="/wiki/Category:Tasks_of_natural_language_processing" title="Category:Tasks of natural language processing">Tasks of natural language processing</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden category: <ul><li><a href="/wiki/Category:Wikipedia_articles_needing_clarification_from_February_2015" title="Category:Wikipedia articles needing clarification from February 2015">Wikipedia articles needing clarification from February 2015</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 13 September 2024, at 21:26<span class="anonymous-show">&#160;(UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=Sentence_boundary_disambiguation&amp;mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/static/images/footer/wikimedia-button.svg" width="84" height="29" alt="Wikimedia Foundation" loading="lazy"></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/w/resources/assets/poweredby_mediawiki.svg" alt="Powered by MediaWiki" width="88" height="31" loading="lazy"></a></li> </ul> </footer> </div> </div> </div> <div class="vector-settings" id="p-dock-bottom"> <ul></ul> </div><script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.codfw.main-f69cdc8f6-b8zqw","wgBackendResponseTime":168,"wgPageParseReport":{"limitreport":{"cputime":"0.317","walltime":"0.537","ppvisitednodes":{"value":901,"limit":1000000},"postexpandincludesize":{"value":44469,"limit":2097152},"templateargumentsize":{"value":507,"limit":2097152},"expansiondepth":{"value":12,"limit":100},"expensivefunctioncount":{"value":4,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":52554,"limit":5000000},"entityaccesscount":{"value":0,"limit":400},"timingprofile":["100.00% 463.583 1 -total"," 33.86% 156.986 1 Template:Reflist"," 20.87% 96.762 1 Template:Natural_language_processing"," 20.55% 95.286 3 Template:Navbox"," 18.35% 85.068 1 Template:Cite_conference"," 13.63% 63.196 1 Template:Clarify"," 9.72% 45.058 12 Template:Cite_web"," 9.34% 43.310 1 Template:Fix-span"," 6.55% 30.360 2 Template:Category_handler"," 1.46% 6.775 1 Template:Delink"]},"scribunto":{"limitreport-timeusage":{"value":"0.192","limit":"10.000"},"limitreport-memusage":{"value":4444854,"limit":52428800}},"cachereport":{"origin":"mw-web.eqiad.canary-7584787d95-f2q89","timestamp":"20241107215031","ttl":2592000,"transientcontent":false}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"Sentence boundary disambiguation","url":"https:\/\/en.wikipedia.org\/wiki\/Sentence_boundary_disambiguation","sameAs":"http:\/\/www.wikidata.org\/entity\/Q7451191","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q7451191","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2006-07-13T16:40:17Z","dateModified":"2024-09-13T21:26:32Z","headline":"problem in natural language processing of deciding where sentences begin and end"}</script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10