CINXE.COM
Our language database – IDEA
<!DOCTYPE html> <html lang="en-US"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta http-equiv="Content-Security-Policy" content="upgrade-insecure-requests"> <script type="text/javascript"> var pathInfo = { base: 'https://www.idea.org/blog/wp-content/themes/ideaorg/', css: 'css/', js: 'js/', swf: 'swf/', } </script> <title>Our language database – IDEA</title> <link rel='dns-prefetch' href='//s.w.org' /> <link rel="alternate" type="application/rss+xml" title="IDEA » Feed" href="https://www.idea.org/blog/feed/" /> <link rel="alternate" type="application/rss+xml" title="IDEA » Comments Feed" href="https://www.idea.org/blog/comments/feed/" /> <script type="text/javascript"> window._wpemojiSettings = {"baseUrl":"https:\/\/s.w.org\/images\/core\/emoji\/11\/72x72\/","ext":".png","svgUrl":"https:\/\/s.w.org\/images\/core\/emoji\/11\/svg\/","svgExt":".svg","source":{"concatemoji":"\/\/www.idea.org\/blog\/wp-includes\/js\/wp-emoji-release.min.js?ver=4.9.26"}}; !function(e,a,t){var n,r,o,i=a.createElement("canvas"),p=i.getContext&&i.getContext("2d");function s(e,t){var a=String.fromCharCode;p.clearRect(0,0,i.width,i.height),p.fillText(a.apply(this,e),0,0);e=i.toDataURL();return p.clearRect(0,0,i.width,i.height),p.fillText(a.apply(this,t),0,0),e===i.toDataURL()}function c(e){var t=a.createElement("script");t.src=e,t.defer=t.type="text/javascript",a.getElementsByTagName("head")[0].appendChild(t)}for(o=Array("flag","emoji"),t.supports={everything:!0,everythingExceptFlag:!0},r=0;r<o.length;r++)t.supports[o[r]]=function(e){if(!p||!p.fillText)return!1;switch(p.textBaseline="top",p.font="600 32px Arial",e){case"flag":return s([55356,56826,55356,56819],[55356,56826,8203,55356,56819])?!1:!s([55356,57332,56128,56423,56128,56418,56128,56421,56128,56430,56128,56423,56128,56447],[55356,57332,8203,56128,56423,8203,56128,56418,8203,56128,56421,8203,56128,56430,8203,56128,56423,8203,56128,56447]);case"emoji":return!s([55358,56760,9792,65039],[55358,56760,8203,9792,65039])}return!1}(o[r]),t.supports.everything=t.supports.everything&&t.supports[o[r]],"flag"!==o[r]&&(t.supports.everythingExceptFlag=t.supports.everythingExceptFlag&&t.supports[o[r]]);t.supports.everythingExceptFlag=t.supports.everythingExceptFlag&&!t.supports.flag,t.DOMReady=!1,t.readyCallback=function(){t.DOMReady=!0},t.supports.everything||(n=function(){t.readyCallback()},a.addEventListener?(a.addEventListener("DOMContentLoaded",n,!1),e.addEventListener("load",n,!1)):(e.attachEvent("onload",n),a.attachEvent("onreadystatechange",function(){"complete"===a.readyState&&t.readyCallback()})),(n=t.source||{}).concatemoji?c(n.concatemoji):n.wpemoji&&n.twemoji&&(c(n.twemoji),c(n.wpemoji)))}(window,document,window._wpemojiSettings); </script> <style type="text/css"> img.wp-smiley, img.emoji { display: inline !important; border: none !important; box-shadow: none !important; height: 1em !important; width: 1em !important; margin: 0 .07em !important; vertical-align: -0.1em !important; background: none !important; padding: 0 !important; } </style> <link rel='stylesheet' id='yarppWidgetCss-css' href='//www.idea.org/blog/wp-content/plugins/yet-another-related-posts-plugin/style/widget.css' type='text/css' media='all' /> <link rel='stylesheet' id='contact-form-7-css' href='//www.idea.org/blog/wp-content/plugins/contact-form-7/includes/css/styles.css' type='text/css' media='all' /> <link rel='stylesheet' id='base-style-css' href='//www.idea.org/blog/wp-content/themes/ideaorg/style.css' type='text/css' media='all' /> <link rel='stylesheet' id='base-theme-css' href='//www.idea.org/blog/wp-content/themes/ideaorg/theme.css' type='text/css' media='all' /> <script type='text/javascript' src='//www.idea.org/blog/wp-includes/js/jquery/jquery.js'></script> <script type='text/javascript' src='//www.idea.org/blog/wp-includes/js/jquery/jquery-migrate.min.js'></script> <link rel='https://api.w.org/' href='https://www.idea.org/blog/wp-json/' /> <link rel="EditURI" type="application/rsd+xml" title="RSD" href="https://www.idea.org/blog/xmlrpc.php?rsd" /> <link rel="wlwmanifest" type="application/wlwmanifest+xml" href="https://www.idea.org/blog/wp-includes/wlwmanifest.xml" /> <link rel="canonical" href="https://www.idea.org/blog/linguabase/" /> <link rel='shortlink' href='https://www.idea.org/blog/?p=4523' /> <link rel="alternate" type="application/json+oembed" href="https://www.idea.org/blog/wp-json/oembed/1.0/embed?url=https%3A%2F%2Fwww.idea.org%2Fblog%2Flinguabase%2F" /> <link rel="alternate" type="text/xml+oembed" href="https://www.idea.org/blog/wp-json/oembed/1.0/embed?url=https%3A%2F%2Fwww.idea.org%2Fblog%2Flinguabase%2F&format=xml" /> </head> <body class="inner page page-id-4523"> <div id="wrapper" class="js-ajax-holder"> <header id="header"> <div class="container"> <div class="logo"> <a href="https://www.idea.org/blog"><img src="https://www.idea.org/blog/wp-content/themes/ideaorg/images/logo.png" alt="IDEA"></a> </div> <div class="search-block"> <a href="#" class="opener"> <picture> <!--[if IE 9]><video style="display: none;"><![endif]--> <source srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/small-search.png, https://www.idea.org/blog/wp-content/themes/ideaorg/images/small-search-2x.png 2x" media="(max-width: 767px)"> <source srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/search.png, https://www.idea.org/blog/wp-content/themes/ideaorg/images/search-2x.png 2x"> <!--[if IE 9]></video><![endif]--> <img src="search.png" alt="search"> </picture> </a> <div class="form-search"> <form action="https://www.idea.org/blog" method="get" class="search js-search-form"> <fieldset> <input id="search" name="s" type="search" placeholder="Type your search..." value=""> <button class="search-btn" type="submit" value=""> <picture> <!--[if IE 9]><video style="display: none;"><![endif]--> <source srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/small-search.png, https://www.idea.org/blog/wp-content/themes/ideaorg/images/small-search-2x.png 2x" media="(max-width: 767px)"> <source srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/search.png, https://www.idea.org/blog/wp-content/themes/ideaorg/images/search-2x.png 2x"> <!--[if IE 9]></video><![endif]--> <img src="search.png" alt="search"> </picture> </button> </fieldset> </form> <a class="opener2" href="#">Browse Topics</a> </div> </div> </div> <div class="topics-page"> <div class="container"> <div class="topics-holder"> <a href="#" class="closer"><span></span></a> <div class="widget widget_tag_cloud"> <h3>Other topics</h3> <div class="tagcloud js-ajax-links-holder"> <a href="https://www.idea.org/blog/tag/access/" class="tag-cloud-link tag-link-222 tag-link-position-1" style="font-size: 28.358208955224px;" aria-label="access (21 items)">access</a>, <a href="https://www.idea.org/blog/tag/android/" class="tag-cloud-link tag-link-112 tag-link-position-2" style="font-size: 21.791044776119px;" aria-label="Android (7 items)">Android</a>, <a href="https://www.idea.org/blog/tag/apple/" class="tag-cloud-link tag-link-110 tag-link-position-3" style="font-size: 26.268656716418px;" aria-label="Apple (15 items)">Apple</a>, <a href="https://www.idea.org/blog/tag/apps/" class="tag-cloud-link tag-link-169 tag-link-position-4" style="font-size: 25.970149253731px;" aria-label="apps (14 items)">apps</a>, <a href="https://www.idea.org/blog/tag/art/" class="tag-cloud-link tag-link-107 tag-link-position-5" style="font-size: 23.880597014925px;" aria-label="art (10 items)">art</a>, <a href="https://www.idea.org/blog/tag/audience/" class="tag-cloud-link tag-link-45 tag-link-position-6" style="font-size: 28.358208955224px;" aria-label="Audience (21 items)">Audience</a>, <a href="https://www.idea.org/blog/tag/blog/" class="tag-cloud-link tag-link-87 tag-link-position-7" style="font-size: 23.283582089552px;" aria-label="blog (9 items)">blog</a>, <a href="https://www.idea.org/blog/tag/classroom/" class="tag-cloud-link tag-link-188 tag-link-position-8" style="font-size: 21.791044776119px;" aria-label="classroom (7 items)">classroom</a>, <a href="https://www.idea.org/blog/tag/community/" class="tag-cloud-link tag-link-26 tag-link-position-9" style="font-size: 25.970149253731px;" aria-label="community (14 items)">community</a>, <a href="https://www.idea.org/blog/tag/content-management/" class="tag-cloud-link tag-link-39 tag-link-position-10" style="font-size: 25.522388059701px;" aria-label="content management (13 items)">content management</a>, <a href="https://www.idea.org/blog/tag/costs/" class="tag-cloud-link tag-link-70 tag-link-position-11" style="font-size: 21.044776119403px;" aria-label="costs (6 items)">costs</a>, <a href="https://www.idea.org/blog/tag/culture/" class="tag-cloud-link tag-link-217 tag-link-position-12" style="font-size: 22.537313432836px;" aria-label="culture (8 items)">culture</a>, <a href="https://www.idea.org/blog/tag/data/" class="tag-cloud-link tag-link-38 tag-link-position-13" style="font-size: 27.164179104478px;" aria-label="Data (17 items)">Data</a>, <a href="https://www.idea.org/blog/tag/dissemination/" class="tag-cloud-link tag-link-134 tag-link-position-14" style="font-size: 25.970149253731px;" aria-label="dissemination (14 items)">dissemination</a>, <a href="https://www.idea.org/blog/tag/e-learning/" class="tag-cloud-link tag-link-81 tag-link-position-15" style="font-size: 22.537313432836px;" aria-label="e-learning (8 items)">e-learning</a>, <a href="https://www.idea.org/blog/tag/education/" class="tag-cloud-link tag-link-192 tag-link-position-16" style="font-size: 26.716417910448px;" aria-label="education (16 items)">education</a>, <a href="https://www.idea.org/blog/tag/evaluation/" class="tag-cloud-link tag-link-135 tag-link-position-17" style="font-size: 26.268656716418px;" aria-label="evaluation (15 items)">evaluation</a>, <a href="https://www.idea.org/blog/tag/exhibits/" class="tag-cloud-link tag-link-53 tag-link-position-18" style="font-size: 24.925373134328px;" aria-label="exhibits (12 items)">exhibits</a>, <a href="https://www.idea.org/blog/tag/experiences/" class="tag-cloud-link tag-link-168 tag-link-position-19" style="font-size: 23.880597014925px;" aria-label="experiences (10 items)">experiences</a>, <a href="https://www.idea.org/blog/tag/facebook/" class="tag-cloud-link tag-link-85 tag-link-position-20" style="font-size: 20px;" aria-label="Facebook (5 items)">Facebook</a>, <a href="https://www.idea.org/blog/tag/finding-information/" class="tag-cloud-link tag-link-21 tag-link-position-21" style="font-size: 20px;" aria-label="finding information (5 items)">finding information</a>, <a href="https://www.idea.org/blog/tag/free/" class="tag-cloud-link tag-link-248 tag-link-position-22" style="font-size: 21.791044776119px;" aria-label="free (7 items)">free</a>, <a href="https://www.idea.org/blog/tag/funding/" class="tag-cloud-link tag-link-149 tag-link-position-23" style="font-size: 21.044776119403px;" aria-label="funding (6 items)">funding</a>, <a href="https://www.idea.org/blog/tag/games/" class="tag-cloud-link tag-link-170 tag-link-position-24" style="font-size: 21.044776119403px;" aria-label="games (6 items)">games</a>, <a href="https://www.idea.org/blog/tag/google/" class="tag-cloud-link tag-link-74 tag-link-position-25" style="font-size: 24.477611940299px;" aria-label="google (11 items)">google</a>, <a href="https://www.idea.org/blog/tag/higher-education/" class="tag-cloud-link tag-link-61 tag-link-position-26" style="font-size: 21.044776119403px;" aria-label="higher education (6 items)">higher education</a>, <a href="https://www.idea.org/blog/tag/history/" class="tag-cloud-link tag-link-154 tag-link-position-27" style="font-size: 22.537313432836px;" aria-label="history (8 items)">history</a>, <a href="https://www.idea.org/blog/tag/interactivity-2/" class="tag-cloud-link tag-link-197 tag-link-position-28" style="font-size: 26.268656716418px;" aria-label="interactivity (15 items)">interactivity</a>, <a href="https://www.idea.org/blog/tag/ios/" class="tag-cloud-link tag-link-113 tag-link-position-29" style="font-size: 21.044776119403px;" aria-label="iOS (6 items)">iOS</a>, <a href="https://www.idea.org/blog/tag/ipad/" class="tag-cloud-link tag-link-108 tag-link-position-30" style="font-size: 22.537313432836px;" aria-label="IPad (8 items)">IPad</a>, <a href="https://www.idea.org/blog/tag/iphone/" class="tag-cloud-link tag-link-123 tag-link-position-31" style="font-size: 21.044776119403px;" aria-label="iPhone (6 items)">iPhone</a>, <a href="https://www.idea.org/blog/tag/learning-2/" class="tag-cloud-link tag-link-191 tag-link-position-32" style="font-size: 24.925373134328px;" aria-label="learning (12 items)">learning</a>, <a href="https://www.idea.org/blog/tag/metrics/" class="tag-cloud-link tag-link-33 tag-link-position-33" style="font-size: 20px;" aria-label="metrics (5 items)">metrics</a>, <a href="https://www.idea.org/blog/tag/mobile/" class="tag-cloud-link tag-link-71 tag-link-position-34" style="font-size: 27.761194029851px;" aria-label="mobile (19 items)">mobile</a>, <a href="https://www.idea.org/blog/tag/museum/" class="tag-cloud-link tag-link-105 tag-link-position-35" style="font-size: 30px;" aria-label="museum (27 items)">museum</a>, <a href="https://www.idea.org/blog/tag/navigation/" class="tag-cloud-link tag-link-22 tag-link-position-36" style="font-size: 20px;" aria-label="navigation (5 items)">navigation</a>, <a href="https://www.idea.org/blog/tag/online-course/" class="tag-cloud-link tag-link-295 tag-link-position-37" style="font-size: 20px;" aria-label="online course (5 items)">online course</a>, <a href="https://www.idea.org/blog/tag/open-access/" class="tag-cloud-link tag-link-177 tag-link-position-38" style="font-size: 21.044776119403px;" aria-label="open access (6 items)">open access</a>, <a href="https://www.idea.org/blog/tag/outreach/" class="tag-cloud-link tag-link-136 tag-link-position-39" style="font-size: 29.253731343284px;" aria-label="outreach (24 items)">outreach</a>, <a href="https://www.idea.org/blog/tag/personalization/" class="tag-cloud-link tag-link-137 tag-link-position-40" style="font-size: 21.044776119403px;" aria-label="personalization (6 items)">personalization</a>, <a href="https://www.idea.org/blog/tag/planning/" class="tag-cloud-link tag-link-139 tag-link-position-41" style="font-size: 24.925373134328px;" aria-label="planning (12 items)">planning</a>, <a href="https://www.idea.org/blog/tag/publishing/" class="tag-cloud-link tag-link-179 tag-link-position-42" style="font-size: 22.537313432836px;" aria-label="publishing (8 items)">publishing</a>, <a href="https://www.idea.org/blog/tag/smartphone/" class="tag-cloud-link tag-link-114 tag-link-position-43" style="font-size: 21.044776119403px;" aria-label="smartphone (6 items)">smartphone</a>, <a href="https://www.idea.org/blog/tag/social-media-2/" class="tag-cloud-link tag-link-51 tag-link-position-44" style="font-size: 26.716417910448px;" aria-label="social media (16 items)">social media</a>, <a href="https://www.idea.org/blog/tag/study/" class="tag-cloud-link tag-link-133 tag-link-position-45" style="font-size: 21.791044776119px;" aria-label="study (7 items)">study</a>, <a href="https://www.idea.org/blog/tag/twitter/" class="tag-cloud-link tag-link-164 tag-link-position-46" style="font-size: 22.537313432836px;" aria-label="Twitter (8 items)">Twitter</a>, <a href="https://www.idea.org/blog/tag/user-interface/" class="tag-cloud-link tag-link-20 tag-link-position-47" style="font-size: 28.358208955224px;" aria-label="user interface (21 items)">user interface</a>, <a href="https://www.idea.org/blog/tag/video/" class="tag-cloud-link tag-link-118 tag-link-position-48" style="font-size: 21.791044776119px;" aria-label="video (7 items)">video</a>, <a href="https://www.idea.org/blog/tag/visitors/" class="tag-cloud-link tag-link-97 tag-link-position-49" style="font-size: 25.522388059701px;" aria-label="visitors (13 items)">visitors</a>, <a href="https://www.idea.org/blog/tag/visualization/" class="tag-cloud-link tag-link-199 tag-link-position-50" style="font-size: 21.044776119403px;" aria-label="visualization (6 items)">visualization</a> </div> </div> <div class="js-ajax-frame"> <section class="post-list"> <h3>Recent posts</h3> <article class="post"> <div class="article-footer"> <time datetime="2014">5 Mar 2014</time> <h4><a href="https://www.idea.org/blog/2014/03/05/gender-role-literacy-girls-in-science/">Gender role literacy: Girls in science?</a></h4> </div> <p>There are gender wars, and then there are casualties. It wasn’t until 2011 that the behemoth toymaker LEGO acknowledged girls’ desire to build with bricks, even though the company had long before made a seemingly effortless pivot to co-branding, video games, and major motion pictures. So it’s little wonder that girls face all-too-real obstacles when <a href="https://www.idea.org/blog/2014/03/05/gender-role-literacy-girls-in-science/">[…]</a></p> <a class="more" href="https://www.idea.org/blog/2014/03/05/gender-role-literacy-girls-in-science/">Read more</a> </article> <article class="post"> <div class="article-footer"> <time datetime="2013">9 Dec 2013</time> <h4><a href="https://www.idea.org/blog/2013/12/09/challenges-of-crowdsourcing-analysis-of-historypin/">Challenges of crowdsourcing: Analysis of Historypin</a></h4> </div> <p>Crowdsourcing can build virtual community, engage the public, and build large knowledge databases about science and culture. But what does it take, and how fast can you grow? For some insight, we look at a crowdsourced history site: Historypin is an appealing database of historical photos, with dates, locations, captions, and other metadata. It’s called History <a href="https://www.idea.org/blog/2013/12/09/challenges-of-crowdsourcing-analysis-of-historypin/">[…]</a></p> <a class="more" href="https://www.idea.org/blog/2013/12/09/challenges-of-crowdsourcing-analysis-of-historypin/">Read more</a> </article> <article class="post"> <div class="article-footer"> <time datetime="2013">18 Nov 2013</time> <h4><a href="https://www.idea.org/blog/2013/11/18/dinovember-creative-literacy-starts-young/">Dinovember: Creative literacy starts young</a></h4> </div> <p>“Uh-oh,” Refe Tuma heard his girls whisper. “Mom and Dad are not going to like this.” It’s Dinovember, and his family’s plastic dinosaurs have been getting into mischief all month. Every year, Tuma and his wife devote the month of November to “convincing our children that, while they sleep, their plastic dinosaur figures come to life. Related <a href="https://www.idea.org/blog/2013/11/18/dinovember-creative-literacy-starts-young/">[…]</a></p> <a class="more" href="https://www.idea.org/blog/2013/11/18/dinovember-creative-literacy-starts-young/">Read more</a> </article> </section> </div> </div> </div> </div> </header> <main role="main" id="main"> <div class="visual bg-stretch"> <span data-srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/small-inbg.jpg, https://www.idea.org/blog/wp-content/themes/ideaorg/images/small-inbg-2x.jpg 2x"></span> <span data-srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/inbg.jpg, https://www.idea.org/blog/wp-content/themes/ideaorg/images/inbg-2x.jpg 2x" data-media="(min-width: 768px)"></span> <span data-srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/inbg.jpg, https://www.idea.org/blog/wp-content/themes/ideaorg/images/inbg-2x.jpg 2x" data-media="(min-width: 1024px)"></span> <div class="container"> <div class="image-holder box-shadow-img"> <picture> <!--[if IE 9]><video style="display: none;"><![endif]--> <source srcset="https://www.idea.org/blog/wp-content/uploads/2016/03/idea_website_visuals_linguabase_01-294x177.png, https://www.idea.org/blog/wp-content/uploads/2016/03/idea_website_visuals_linguabase_01-588x353.png 2x" media="(max-width: 767px)"> <source srcset="https://www.idea.org/blog/wp-content/uploads/2016/03/idea_website_visuals_linguabase_01-1030x360.png, https://www.idea.org/blog/wp-content/uploads/2016/03/idea_website_visuals_linguabase_01.png 2x"> <!--[if IE 9]></video><![endif]--> <img src="https://www.idea.org/blog/wp-content/uploads/2016/03/idea_website_visuals_linguabase_01-1030x360.png" alt=""> </picture> </div> </div> </div> <div id="twocolumns"> <div class="container"> <h1>Our language database</h1> <div class="twocolumns-holder"> <section id="content" class="about-holder img-posts"> <article class="post"> <p><strong><img class="alignright wp-image-4524" src="http://www.idea.org/blog/wp-content/uploads/2016/03/Screen-Shot-2016-03-11-at-11.55.59-PM-545x351.png" alt="Screen Shot 2016-03-11 at 11.55.59 PM" width="333" height="215" srcset="https://www.idea.org/blog/wp-content/uploads/2016/03/Screen-Shot-2016-03-11-at-11.55.59-PM-545x351.png 545w, https://www.idea.org/blog/wp-content/uploads/2016/03/Screen-Shot-2016-03-11-at-11.55.59-PM-150x97.png 150w, https://www.idea.org/blog/wp-content/uploads/2016/03/Screen-Shot-2016-03-11-at-11.55.59-PM-240x155.png 240w, https://www.idea.org/blog/wp-content/uploads/2016/03/Screen-Shot-2016-03-11-at-11.55.59-PM-60x39.png 60w, https://www.idea.org/blog/wp-content/uploads/2016/03/Screen-Shot-2016-03-11-at-11.55.59-PM-120x77.png 120w, https://www.idea.org/blog/wp-content/uploads/2016/03/Screen-Shot-2016-03-11-at-11.55.59-PM-35x23.png 35w, https://www.idea.org/blog/wp-content/uploads/2016/03/Screen-Shot-2016-03-11-at-11.55.59-PM-70x45.png 70w, https://www.idea.org/blog/wp-content/uploads/2016/03/Screen-Shot-2016-03-11-at-11.55.59-PM-68x44.png 68w, https://www.idea.org/blog/wp-content/uploads/2016/03/Screen-Shot-2016-03-11-at-11.55.59-PM-136x88.png 136w, https://www.idea.org/blog/wp-content/uploads/2016/03/Screen-Shot-2016-03-11-at-11.55.59-PM.png 614w" sizes="(max-width: 333px) 100vw, 333px" /></strong>The IDEA Linguabase is a large lexicon for use in consumer-facing and natural language processing applications. Built over a period of four years by our team of programmers and lexicographers, it contains definitions and weighted word relations for over 500,000 terms, along with a data graph of over 50 million word associations.</p> <p>The database suggests related words for traditional thesaurus topics, as well as hundreds of thousands of terms that are omitted from typical thesauri. It covers adjectives, like “happy,” “joyful,” “cheerful,” and over 300,000 additional nouns, such as “golf club,” “iron,” and “brassie.” It contains both closely similar words, akin to synonyms and near-synonyms – think “house,” “domicile,” and “lodge” – as well as items of the same type – “house,” “bungalow,” “villa” – and more distantly associated words: “house,” “quarter,” “dwell.” All relations have a decimal weight ranging from 1, being very similar, to 0, indicating a low confidence of association.</p> <p>The motivation for Linguabase was the lack of an existing database that met the requirements for our language app projects. Describing English is notoriously expensive, requiring massive amounts of labor from highly educated, specialized talent. In 1985, Princeton began creating WordNet, an influential, large-scale Open Source language database. This electronic reference was first published in 1991. WordNet and related projects, like Framenet and VerbNet, are a mainstay of natural language processing research. While WordNet is included in the IDEA Linguabase, it is of limited use as its intention is to define sets of related terms rather than act as a thesaurus.</p> <h3>Project activities</h3> <p>The IDEA Linguabase combines several publicly available sources in a unique way, and adds our own lexicographic work.</p> <p>Some steps in our process required an intense amount of computing time and power. A single desktop computer could process each pool of text in 5 to 10 minutes, not including testing and refinements; it would have taken over a decade of computing time to analyze all the words. To accelerate this process, we used hundreds of thousands of hours of supercomputer power from the NSF-funded Extreme Science and Engineering Discovery Environment (XSEDE), grant #IRI130011. Using their supercomputer, we were able to spread this workload across thousands of processing cores, yielding over 30 million ranked word relationships in a matter of days.</p> <p>The heart of the database is a word list, with definitions, based on crowdsourced content. We included words, compound words, and idioms from Wiktionary, as well as major encyclopedic terms from Wikipedia, to create our unabridged dictionary.</p> <p>We then analyzed several dozen free, open source, and commercial thesauri, including WordNet, the NASA Thesaurus, and data from the National Library of Medicine and the Library of Congress. These sources helped us find over a million word relationships.</p> <p>We sought to capture a foundation of broader associations, like the connection between “horse” and “stable” or “cat” and “meow.” To do this, we built a large corpus of English prose from multiple genres. For each of our terms, we extracted a pool of matching sentences and paragraphs. We then used topic modeling to discover abstract topics in collections of text. These topic models examined the statistics of words in each collection, revealing clusters of words likely to appear together.</p> <p>In addition to natural language processing, we conducted new lexicographic work focused on cultural expansiveness. We defined thousands of groups of related terms, from denominations of Christianity to human bones to high-pitched sounds, that go beyond synonyms. We identified thousands of definitions and relationships for the most common words in English (so-called stopwords), which are typically omitted from word databases. We made a comprehensive list of vulgar and offensive terms, and identified thousands of antonym pairs. We subjected our work to intense editorial review to catch errors from the natural language processing, such as those caused by compound words (e.g., “New York,” which was broken into “New” and “York”) or errors from faulty optical character recognition. This work expanded the word relationships for more than 80,000 terms, while also fixing thousands of errors.</p> <p>In order to identify words that are often used together (so-called collocations), we built on the data provided by Google’s NGrams project, which is their analysis of data from over 5 million scanned books. We identified common usages and provided words that typically precede and follow a given term.</p> <p>We analyzed phonetic sounds to produce rhymes, identify word families, and generate various forms of wordplay, such as words with common starting or ending letters, words whose letters appear as part of or contain other words, and words with curious letter patterns.</p> <p>Finally, we used artificial intelligence for fine tuning and to create validation datasets used to build on current and future generations of generative AI. </p> <p>Our research created the Linguabase engine for our language-related mobile apps, but we are also interested in ideas that other developers or publishers may have for our database.</p> </article> </section> <aside id="sidebar"> <section class="widget"> <article class="post"> <p>The Linguabase is available for licensing for use in apps, web sites, cloud services, and other applications that require a large thesaurus of weighted word relationships.</p> <div><a href="/contact/"> Contact us to discuss licensing <i class="icon-arrow-right-alt1"></i></a></div> </article> <article class="post"> <p>The Linguabase powers <a href="/language-apps/">our apps</a>.</p> </article> <article class="post"> <p> Travel through a galaxy of words in this matching game.</p> <div><a href="/otherwordly/"> Go to Otherwordly <i class="icon-arrow-right-alt1"></i></a></div> </article> <article class="post"> <p> Delve into words, definitions, and real-world usage examples.</p> <div><a href="/inotherwords/"> Go to In Other Words <i class="icon-arrow-right-alt1"></i></a></div> </article> </section> </aside> </div> </div> </div> </main> <aside class="items-holder"> <div class="container"> <div class="list-items widget-apps-widget"><h3>Read more about <br class="separator"><strong>our language-related apps.</strong></h3> <ul> <li> <div class="item-holder"> <a href="http://www.idea.org/otherwordly"> <picture> <!--[if IE 9]><video style="display: none;"><![endif]--> <source srcset="https://www.idea.org/blog/wp-content/uploads/2023/09/otherwordly_appicon_1024px_rounded_01-35x36.png, https://www.idea.org/blog/wp-content/uploads/2023/09/otherwordly_appicon_1024px_rounded_01-70x70.png 2x" media="(max-width: 767px)"> <source srcset="https://www.idea.org/blog/wp-content/uploads/2023/09/otherwordly_appicon_1024px_rounded_01-70x70.png, https://www.idea.org/blog/wp-content/uploads/2023/09/otherwordly_appicon_1024px_rounded_01-140x140.png 2x"> <!--[if IE 9]></video><![endif]--> <img src="https://www.idea.org/blog/wp-content/uploads/2023/09/otherwordly_appicon_1024px_rounded_01-70x70.png" alt=""> </picture> <span class="title" style="color:#c46e3c">OtherWordly</span> </a> </div> </li> <li> <div class="hr"></div> <div class="item-holder"> <a href="http://www.idea.org/inotherwords"> <picture> <!--[if IE 9]><video style="display: none;"><![endif]--> <source srcset="https://www.idea.org/blog/wp-content/uploads/2023/09/inotherwords_appicon_1024px_rounded_01-35x36.png, https://www.idea.org/blog/wp-content/uploads/2023/09/inotherwords_appicon_1024px_rounded_01-70x70.png 2x" media="(max-width: 767px)"> <source srcset="https://www.idea.org/blog/wp-content/uploads/2023/09/inotherwords_appicon_1024px_rounded_01-70x70.png, https://www.idea.org/blog/wp-content/uploads/2023/09/inotherwords_appicon_1024px_rounded_01-140x140.png 2x"> <!--[if IE 9]></video><![endif]--> <img src="https://www.idea.org/blog/wp-content/uploads/2023/09/inotherwords_appicon_1024px_rounded_01-70x70.png" alt=""> </picture> <span class="title" style="color:#8b8be0">In Other Words</span> </a> </div> </li> <li> <div class="hr"></div> <div class="item-holder"> <a href="http://www.idea.org/linguabase"> <picture> <!--[if IE 9]><video style="display: none;"><![endif]--> <source srcset="https://www.idea.org/blog/wp-content/uploads/2023/09/linguabase_appicon_1024px_rounded_01-35x36.png, https://www.idea.org/blog/wp-content/uploads/2023/09/linguabase_appicon_1024px_rounded_01-70x70.png 2x" media="(max-width: 767px)"> <source srcset="https://www.idea.org/blog/wp-content/uploads/2023/09/linguabase_appicon_1024px_rounded_01-70x70.png, https://www.idea.org/blog/wp-content/uploads/2023/09/linguabase_appicon_1024px_rounded_01-140x140.png 2x"> <!--[if IE 9]></video><![endif]--> <img src="https://www.idea.org/blog/wp-content/uploads/2023/09/linguabase_appicon_1024px_rounded_01-70x70.png" alt=""> </picture> <span class="title" style="color:#b859cc">Liguabase</span> </a> </div> </li> </ul> </div> </div> </aside> <footer id="footer"> <div class="container"> <nav class="page-nav"> <ul> <li id="menu-item-4498" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-home menu-item-4498"><a href="https://www.idea.org/blog/">Home</a></li> <li id="menu-item-4497" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-4497"><a href="https://www.idea.org/blog/about/">Mission & history</a></li> <li id="menu-item-4499" class="popup menu-item menu-item-type-custom menu-item-object-custom menu-item-4499"><a title="popup" class="js-btn-show-popups" href="#wrapper">Browse topics</a></li> <li id="menu-item-4666" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-4666"><a href="https://www.idea.org/blog/contact/">Contact IDEA</a></li> <li>Copyright © 2024 <a href="https://www.idea.org/blog">IDEA</a></li> </ul> </nav> </div> </footer> </div> <link rel='stylesheet' id='yarppRelatedCss-css' href='//www.idea.org/blog/wp-content/plugins/yet-another-related-posts-plugin/style/related.css' type='text/css' media='all' /> <script type='text/javascript'> /* <![CDATA[ */ var wpcf7 = {"apiSettings":{"root":"https:\/\/www.idea.org\/blog\/wp-json\/contact-form-7\/v1","namespace":"contact-form-7\/v1"},"cached":"1"}; /* ]]> */ </script> <script type='text/javascript' src='//www.idea.org/blog/wp-content/plugins/contact-form-7/includes/js/scripts.js'></script> <script type='text/javascript' src='//www.idea.org/blog/wp-content/themes/ideaorg/js/jquery.main.js'></script> <script type='text/javascript' src='//www.idea.org/blog/wp-includes/js/wp-embed.min.js'></script> </body> <!-- Google tag (gtag.js) --> <script async src="https://www.googletagmanager.com/gtag/js?id=G-Y68B45MT0H"></script> <script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'G-Y68B45MT0H'); </script> </html> <!-- Performance optimized by W3 Total Cache. Learn more: https://www.w3-edge.com/products/ Object Caching 1957/117 objects using disk Page Caching using disk: enhanced Database Caching 2/111 queries in 0.021 seconds using disk Served from: www.idea.org @ 2024-11-24 13:05:55 by W3 Total Cache -->