CINXE.COM

Mapping Language: The IDEA Linguabase – IDEA

<!DOCTYPE html> <html lang="en-US"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta http-equiv="Content-Security-Policy" content="upgrade-insecure-requests"> <script type="text/javascript"> var pathInfo = { base: 'https://www.idea.org/blog/wp-content/themes/ideaorg/', css: 'css/', js: 'js/', swf: 'swf/', } </script> <title>Mapping Language: The IDEA Linguabase &#8211; IDEA</title> <link rel='dns-prefetch' href='//s.w.org' /> <link rel="alternate" type="application/rss+xml" title="IDEA &raquo; Feed" href="https://www.idea.org/blog/feed/" /> <link rel="alternate" type="application/rss+xml" title="IDEA &raquo; Comments Feed" href="https://www.idea.org/blog/comments/feed/" /> <script type="text/javascript"> window._wpemojiSettings = {"baseUrl":"https:\/\/s.w.org\/images\/core\/emoji\/11\/72x72\/","ext":".png","svgUrl":"https:\/\/s.w.org\/images\/core\/emoji\/11\/svg\/","svgExt":".svg","source":{"concatemoji":"\/\/www.idea.org\/blog\/wp-includes\/js\/wp-emoji-release.min.js?ver=4.9.26"}}; !function(e,a,t){var n,r,o,i=a.createElement("canvas"),p=i.getContext&&i.getContext("2d");function s(e,t){var a=String.fromCharCode;p.clearRect(0,0,i.width,i.height),p.fillText(a.apply(this,e),0,0);e=i.toDataURL();return p.clearRect(0,0,i.width,i.height),p.fillText(a.apply(this,t),0,0),e===i.toDataURL()}function c(e){var t=a.createElement("script");t.src=e,t.defer=t.type="text/javascript",a.getElementsByTagName("head")[0].appendChild(t)}for(o=Array("flag","emoji"),t.supports={everything:!0,everythingExceptFlag:!0},r=0;r<o.length;r++)t.supports[o[r]]=function(e){if(!p||!p.fillText)return!1;switch(p.textBaseline="top",p.font="600 32px Arial",e){case"flag":return s([55356,56826,55356,56819],[55356,56826,8203,55356,56819])?!1:!s([55356,57332,56128,56423,56128,56418,56128,56421,56128,56430,56128,56423,56128,56447],[55356,57332,8203,56128,56423,8203,56128,56418,8203,56128,56421,8203,56128,56430,8203,56128,56423,8203,56128,56447]);case"emoji":return!s([55358,56760,9792,65039],[55358,56760,8203,9792,65039])}return!1}(o[r]),t.supports.everything=t.supports.everything&&t.supports[o[r]],"flag"!==o[r]&&(t.supports.everythingExceptFlag=t.supports.everythingExceptFlag&&t.supports[o[r]]);t.supports.everythingExceptFlag=t.supports.everythingExceptFlag&&!t.supports.flag,t.DOMReady=!1,t.readyCallback=function(){t.DOMReady=!0},t.supports.everything||(n=function(){t.readyCallback()},a.addEventListener?(a.addEventListener("DOMContentLoaded",n,!1),e.addEventListener("load",n,!1)):(e.attachEvent("onload",n),a.attachEvent("onreadystatechange",function(){"complete"===a.readyState&&t.readyCallback()})),(n=t.source||{}).concatemoji?c(n.concatemoji):n.wpemoji&&n.twemoji&&(c(n.twemoji),c(n.wpemoji)))}(window,document,window._wpemojiSettings); </script> <style type="text/css"> img.wp-smiley, img.emoji { display: inline !important; border: none !important; box-shadow: none !important; height: 1em !important; width: 1em !important; margin: 0 .07em !important; vertical-align: -0.1em !important; background: none !important; padding: 0 !important; } </style> <link rel='stylesheet' id='yarppWidgetCss-css' href='//www.idea.org/blog/wp-content/plugins/yet-another-related-posts-plugin/style/widget.css' type='text/css' media='all' /> <link rel='stylesheet' id='contact-form-7-css' href='//www.idea.org/blog/wp-content/plugins/contact-form-7/includes/css/styles.css' type='text/css' media='all' /> <link rel='stylesheet' id='base-style-css' href='//www.idea.org/blog/wp-content/themes/ideaorg/style.css' type='text/css' media='all' /> <link rel='stylesheet' id='base-theme-css' href='//www.idea.org/blog/wp-content/themes/ideaorg/theme.css' type='text/css' media='all' /> <script type='text/javascript' src='//www.idea.org/blog/wp-includes/js/jquery/jquery.js'></script> <script type='text/javascript' src='//www.idea.org/blog/wp-includes/js/jquery/jquery-migrate.min.js'></script> <link rel='https://api.w.org/' href='https://www.idea.org/blog/wp-json/' /> <link rel="EditURI" type="application/rsd+xml" title="RSD" href="https://www.idea.org/blog/xmlrpc.php?rsd" /> <link rel="wlwmanifest" type="application/wlwmanifest+xml" href="https://www.idea.org/blog/wp-includes/wlwmanifest.xml" /> <link rel="canonical" href="https://www.idea.org/blog/linguabase/" /> <link rel='shortlink' href='https://www.idea.org/blog/?p=4523' /> <link rel="alternate" type="application/json+oembed" href="https://www.idea.org/blog/wp-json/oembed/1.0/embed?url=https%3A%2F%2Fwww.idea.org%2Fblog%2Flinguabase%2F" /> <link rel="alternate" type="text/xml+oembed" href="https://www.idea.org/blog/wp-json/oembed/1.0/embed?url=https%3A%2F%2Fwww.idea.org%2Fblog%2Flinguabase%2F&#038;format=xml" /> </head> <body class="inner page page-id-4523"> <div id="wrapper" class="js-ajax-holder"> <header id="header"> <div class="container"> <div class="logo"> <a href="https://www.idea.org/blog"><img src="https://www.idea.org/blog/wp-content/themes/ideaorg/images/logo.png" alt="IDEA"></a> </div> <div class="search-block"> <a href="#" class="opener"> <picture> <!--[if IE 9]><video style="display: none;"><![endif]--> <source srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/small-search.png, https://www.idea.org/blog/wp-content/themes/ideaorg/images/small-search-2x.png 2x" media="(max-width: 767px)"> <source srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/search.png, https://www.idea.org/blog/wp-content/themes/ideaorg/images/search-2x.png 2x"> <!--[if IE 9]></video><![endif]--> <img src="search.png" alt="search"> </picture> </a> <div class="form-search"> <form action="https://www.idea.org/blog" method="get" class="search js-search-form"> <fieldset> <input id="search" name="s" type="search" placeholder="Type your search..." value=""> <button class="search-btn" type="submit" value=""> <picture> <!--[if IE 9]><video style="display: none;"><![endif]--> <source srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/small-search.png, https://www.idea.org/blog/wp-content/themes/ideaorg/images/small-search-2x.png 2x" media="(max-width: 767px)"> <source srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/search.png, https://www.idea.org/blog/wp-content/themes/ideaorg/images/search-2x.png 2x"> <!--[if IE 9]></video><![endif]--> <img src="search.png" alt="search"> </picture> </button> </fieldset> </form> <a class="opener2" href="#">Browse Topics</a> </div> </div> </div> <div class="topics-page"> <div class="container"> <div class="topics-holder"> <a href="#" class="closer"><span></span></a> <div class="widget widget_tag_cloud"> <h3>Other topics</h3> <div class="tagcloud js-ajax-links-holder"> <a href="https://www.idea.org/blog/tag/access/" class="tag-cloud-link tag-link-222 tag-link-position-1" style="font-size: 28.358208955224px;" aria-label="access (21 items)">access</a>, <a href="https://www.idea.org/blog/tag/android/" class="tag-cloud-link tag-link-112 tag-link-position-2" style="font-size: 21.791044776119px;" aria-label="Android (7 items)">Android</a>, <a href="https://www.idea.org/blog/tag/apple/" class="tag-cloud-link tag-link-110 tag-link-position-3" style="font-size: 26.268656716418px;" aria-label="Apple (15 items)">Apple</a>, <a href="https://www.idea.org/blog/tag/apps/" class="tag-cloud-link tag-link-169 tag-link-position-4" style="font-size: 25.970149253731px;" aria-label="apps (14 items)">apps</a>, <a href="https://www.idea.org/blog/tag/art/" class="tag-cloud-link tag-link-107 tag-link-position-5" style="font-size: 23.880597014925px;" aria-label="art (10 items)">art</a>, <a href="https://www.idea.org/blog/tag/audience/" class="tag-cloud-link tag-link-45 tag-link-position-6" style="font-size: 28.358208955224px;" aria-label="Audience (21 items)">Audience</a>, <a href="https://www.idea.org/blog/tag/blog/" class="tag-cloud-link tag-link-87 tag-link-position-7" style="font-size: 23.283582089552px;" aria-label="blog (9 items)">blog</a>, <a href="https://www.idea.org/blog/tag/classroom/" class="tag-cloud-link tag-link-188 tag-link-position-8" style="font-size: 21.791044776119px;" aria-label="classroom (7 items)">classroom</a>, <a href="https://www.idea.org/blog/tag/community/" class="tag-cloud-link tag-link-26 tag-link-position-9" style="font-size: 25.970149253731px;" aria-label="community (14 items)">community</a>, <a href="https://www.idea.org/blog/tag/content-management/" class="tag-cloud-link tag-link-39 tag-link-position-10" style="font-size: 25.522388059701px;" aria-label="content management (13 items)">content management</a>, <a href="https://www.idea.org/blog/tag/costs/" class="tag-cloud-link tag-link-70 tag-link-position-11" style="font-size: 21.044776119403px;" aria-label="costs (6 items)">costs</a>, <a href="https://www.idea.org/blog/tag/culture/" class="tag-cloud-link tag-link-217 tag-link-position-12" style="font-size: 22.537313432836px;" aria-label="culture (8 items)">culture</a>, <a href="https://www.idea.org/blog/tag/data/" class="tag-cloud-link tag-link-38 tag-link-position-13" style="font-size: 27.164179104478px;" aria-label="Data (17 items)">Data</a>, <a href="https://www.idea.org/blog/tag/dissemination/" class="tag-cloud-link tag-link-134 tag-link-position-14" style="font-size: 25.970149253731px;" aria-label="dissemination (14 items)">dissemination</a>, <a href="https://www.idea.org/blog/tag/e-learning/" class="tag-cloud-link tag-link-81 tag-link-position-15" style="font-size: 22.537313432836px;" aria-label="e-learning (8 items)">e-learning</a>, <a href="https://www.idea.org/blog/tag/education/" class="tag-cloud-link tag-link-192 tag-link-position-16" style="font-size: 26.716417910448px;" aria-label="education (16 items)">education</a>, <a href="https://www.idea.org/blog/tag/evaluation/" class="tag-cloud-link tag-link-135 tag-link-position-17" style="font-size: 26.268656716418px;" aria-label="evaluation (15 items)">evaluation</a>, <a href="https://www.idea.org/blog/tag/exhibits/" class="tag-cloud-link tag-link-53 tag-link-position-18" style="font-size: 24.925373134328px;" aria-label="exhibits (12 items)">exhibits</a>, <a href="https://www.idea.org/blog/tag/experiences/" class="tag-cloud-link tag-link-168 tag-link-position-19" style="font-size: 23.880597014925px;" aria-label="experiences (10 items)">experiences</a>, <a href="https://www.idea.org/blog/tag/facebook/" class="tag-cloud-link tag-link-85 tag-link-position-20" style="font-size: 20px;" aria-label="Facebook (5 items)">Facebook</a>, <a href="https://www.idea.org/blog/tag/finding-information/" class="tag-cloud-link tag-link-21 tag-link-position-21" style="font-size: 20px;" aria-label="finding information (5 items)">finding information</a>, <a href="https://www.idea.org/blog/tag/free/" class="tag-cloud-link tag-link-248 tag-link-position-22" style="font-size: 21.791044776119px;" aria-label="free (7 items)">free</a>, <a href="https://www.idea.org/blog/tag/funding/" class="tag-cloud-link tag-link-149 tag-link-position-23" style="font-size: 21.044776119403px;" aria-label="funding (6 items)">funding</a>, <a href="https://www.idea.org/blog/tag/games/" class="tag-cloud-link tag-link-170 tag-link-position-24" style="font-size: 21.044776119403px;" aria-label="games (6 items)">games</a>, <a href="https://www.idea.org/blog/tag/google/" class="tag-cloud-link tag-link-74 tag-link-position-25" style="font-size: 24.477611940299px;" aria-label="google (11 items)">google</a>, <a href="https://www.idea.org/blog/tag/higher-education/" class="tag-cloud-link tag-link-61 tag-link-position-26" style="font-size: 21.044776119403px;" aria-label="higher education (6 items)">higher education</a>, <a href="https://www.idea.org/blog/tag/history/" class="tag-cloud-link tag-link-154 tag-link-position-27" style="font-size: 22.537313432836px;" aria-label="history (8 items)">history</a>, <a href="https://www.idea.org/blog/tag/interactivity-2/" class="tag-cloud-link tag-link-197 tag-link-position-28" style="font-size: 26.268656716418px;" aria-label="interactivity (15 items)">interactivity</a>, <a href="https://www.idea.org/blog/tag/ios/" class="tag-cloud-link tag-link-113 tag-link-position-29" style="font-size: 21.044776119403px;" aria-label="iOS (6 items)">iOS</a>, <a href="https://www.idea.org/blog/tag/ipad/" class="tag-cloud-link tag-link-108 tag-link-position-30" style="font-size: 22.537313432836px;" aria-label="IPad (8 items)">IPad</a>, <a href="https://www.idea.org/blog/tag/iphone/" class="tag-cloud-link tag-link-123 tag-link-position-31" style="font-size: 21.044776119403px;" aria-label="iPhone (6 items)">iPhone</a>, <a href="https://www.idea.org/blog/tag/learning-2/" class="tag-cloud-link tag-link-191 tag-link-position-32" style="font-size: 24.925373134328px;" aria-label="learning (12 items)">learning</a>, <a href="https://www.idea.org/blog/tag/metrics/" class="tag-cloud-link tag-link-33 tag-link-position-33" style="font-size: 20px;" aria-label="metrics (5 items)">metrics</a>, <a href="https://www.idea.org/blog/tag/mobile/" class="tag-cloud-link tag-link-71 tag-link-position-34" style="font-size: 27.761194029851px;" aria-label="mobile (19 items)">mobile</a>, <a href="https://www.idea.org/blog/tag/museum/" class="tag-cloud-link tag-link-105 tag-link-position-35" style="font-size: 30px;" aria-label="museum (27 items)">museum</a>, <a href="https://www.idea.org/blog/tag/navigation/" class="tag-cloud-link tag-link-22 tag-link-position-36" style="font-size: 20px;" aria-label="navigation (5 items)">navigation</a>, <a href="https://www.idea.org/blog/tag/online-course/" class="tag-cloud-link tag-link-295 tag-link-position-37" style="font-size: 20px;" aria-label="online course (5 items)">online course</a>, <a href="https://www.idea.org/blog/tag/open-access/" class="tag-cloud-link tag-link-177 tag-link-position-38" style="font-size: 21.044776119403px;" aria-label="open access (6 items)">open access</a>, <a href="https://www.idea.org/blog/tag/outreach/" class="tag-cloud-link tag-link-136 tag-link-position-39" style="font-size: 29.253731343284px;" aria-label="outreach (24 items)">outreach</a>, <a href="https://www.idea.org/blog/tag/personalization/" class="tag-cloud-link tag-link-137 tag-link-position-40" style="font-size: 21.044776119403px;" aria-label="personalization (6 items)">personalization</a>, <a href="https://www.idea.org/blog/tag/planning/" class="tag-cloud-link tag-link-139 tag-link-position-41" style="font-size: 24.925373134328px;" aria-label="planning (12 items)">planning</a>, <a href="https://www.idea.org/blog/tag/publishing/" class="tag-cloud-link tag-link-179 tag-link-position-42" style="font-size: 22.537313432836px;" aria-label="publishing (8 items)">publishing</a>, <a href="https://www.idea.org/blog/tag/smartphone/" class="tag-cloud-link tag-link-114 tag-link-position-43" style="font-size: 21.044776119403px;" aria-label="smartphone (6 items)">smartphone</a>, <a href="https://www.idea.org/blog/tag/social-media-2/" class="tag-cloud-link tag-link-51 tag-link-position-44" style="font-size: 26.716417910448px;" aria-label="social media (16 items)">social media</a>, <a href="https://www.idea.org/blog/tag/study/" class="tag-cloud-link tag-link-133 tag-link-position-45" style="font-size: 21.791044776119px;" aria-label="study (7 items)">study</a>, <a href="https://www.idea.org/blog/tag/twitter/" class="tag-cloud-link tag-link-164 tag-link-position-46" style="font-size: 22.537313432836px;" aria-label="Twitter (8 items)">Twitter</a>, <a href="https://www.idea.org/blog/tag/user-interface/" class="tag-cloud-link tag-link-20 tag-link-position-47" style="font-size: 28.358208955224px;" aria-label="user interface (21 items)">user interface</a>, <a href="https://www.idea.org/blog/tag/video/" class="tag-cloud-link tag-link-118 tag-link-position-48" style="font-size: 21.791044776119px;" aria-label="video (7 items)">video</a>, <a href="https://www.idea.org/blog/tag/visitors/" class="tag-cloud-link tag-link-97 tag-link-position-49" style="font-size: 25.522388059701px;" aria-label="visitors (13 items)">visitors</a>, <a href="https://www.idea.org/blog/tag/visualization/" class="tag-cloud-link tag-link-199 tag-link-position-50" style="font-size: 21.044776119403px;" aria-label="visualization (6 items)">visualization</a> </div> </div> <div class="js-ajax-frame"> <section class="post-list"> <h3>Recent posts</h3> <article class="post"> <div class="article-footer"> <time datetime="2017">14 Feb 2017</time> <h4><a href="https://www.idea.org/blog/2017/02/14/made-a-splash/">OtherWordly Makes Splash at Takoma Park Play Day</a></h4> </div> <p>Community Embraces New Word Game at Mid-Year Play Day This past Sunday, families at Takoma Park&#8217;s Seventh Annual Mid-Year Play Day had the opportunity to experience OtherWordly for the first time. Our educational language game drew curious children and parents to our table throughout the afternoon. Words in Space Several children gathered around our iPads <a href="https://www.idea.org/blog/2017/02/14/made-a-splash/">[&#8230;]</a></p> <a class="more" href="https://www.idea.org/blog/2017/02/14/made-a-splash/">Read more</a> </article> <article class="post"> <div class="article-footer"> <time datetime="2017">9 Feb 2017</time> <h4><a href="https://www.idea.org/blog/2017/02/09/lets-play-day/">Play OtherWordly at Takoma Park Play Day This Sunday!</a></h4> </div> <p>Play with the meanings of words, in space! Come by 7500 Maple Ave on Sunday afternoon, at the Takoma Park Community Center (next to the library) for the Seventh Annual Mid-Year Play Day. We&#8217;re excited to introduce OtherWordly to our local community! What to Expect As part of IDEA&#8217;s commitment to childhood literacy, OtherWordly helps <a href="https://www.idea.org/blog/2017/02/09/lets-play-day/">[&#8230;]</a></p> <a class="more" href="https://www.idea.org/blog/2017/02/09/lets-play-day/">Read more</a> </article> <article class="post"> <div class="article-footer"> <time datetime="2014">5 Mar 2014</time> <h4><a href="https://www.idea.org/blog/2014/03/05/gender-role-literacy-girls-in-science/">Gender role literacy: Girls in science?</a></h4> </div> <p>There are gender wars, and then there are casualties. It wasn’t until 2011 that the behemoth toymaker LEGO acknowledged girls’ desire to build with bricks, even though the company had long before made a seemingly effortless pivot to co-branding, video games, and major motion pictures. So it’s little wonder that girls face all-too-real obstacles when <a href="https://www.idea.org/blog/2014/03/05/gender-role-literacy-girls-in-science/">[&#8230;]</a></p> <a class="more" href="https://www.idea.org/blog/2014/03/05/gender-role-literacy-girls-in-science/">Read more</a> </article> </section> </div> </div> </div> </div> </header> <main role="main" id="main"> <div class="visual bg-stretch"> <span data-srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/small-inbg.jpg, https://www.idea.org/blog/wp-content/themes/ideaorg/images/small-inbg-2x.jpg 2x"></span> <span data-srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/inbg.jpg, https://www.idea.org/blog/wp-content/themes/ideaorg/images/inbg-2x.jpg 2x" data-media="(min-width: 768px)"></span> <span data-srcset="https://www.idea.org/blog/wp-content/themes/ideaorg/images/inbg.jpg, https://www.idea.org/blog/wp-content/themes/ideaorg/images/inbg-2x.jpg 2x" data-media="(min-width: 1024px)"></span> <div class="container"> <div class="image-holder box-shadow-img"> <picture> <!--[if IE 9]><video style="display: none;"><![endif]--> <source srcset="https://www.idea.org/blog/wp-content/uploads/2016/03/idea_website_visuals_linguabase_01-294x177.png, https://www.idea.org/blog/wp-content/uploads/2016/03/idea_website_visuals_linguabase_01-588x353.png 2x" media="(max-width: 767px)"> <source srcset="https://www.idea.org/blog/wp-content/uploads/2016/03/idea_website_visuals_linguabase_01-1030x360.png, https://www.idea.org/blog/wp-content/uploads/2016/03/idea_website_visuals_linguabase_01.png 2x"> <!--[if IE 9]></video><![endif]--> <img src="https://www.idea.org/blog/wp-content/uploads/2016/03/idea_website_visuals_linguabase_01-1030x360.png" alt=""> </picture> </div> </div> </div> <div id="twocolumns"> <div class="container"> <h1>Mapping Language: The IDEA Linguabase</h1> <div class="twocolumns-holder"> <section id="content" class="about-holder img-posts"> <article class="post"> <p class="whitespace-pre-wrap break-words">The IDEA Linguabase combines traditional lexicography with modern data processing and large language models. With 1.1 million headwords connected by 60 million weighted relationships, this language database extends well beyond traditional reference works. This unprecedented scale reflects our inclusion of everyday objects, multi-word phrases, and encyclopedic terms—not just the abstract concepts found in standard thesauri.</p> <h2 class="text-xl font-bold text-text-200 mt-1 -mb-0.5">Beyond Traditional Thesauri</h2> <p class="whitespace-pre-wrap break-words">Traditional thesauri serve as &#8220;synonym dictionaries&#8221; – references where writers find different words with similar meanings. These works typically focus on abstract concepts, emotions, actions, and qualities rather than concrete objects, for practical reasons:</p> <ol class="[&amp;:not(:last-child)_ul]:pb-1 [&amp;:not(:last-child)_ol]:pb-1 list-decimal space-y-1.5 pl-7"> <li class="whitespace-normal break-words">Writers need synonyms for verbs, adjectives, and abstract nouns more than for concrete objects</li> <li class="whitespace-normal break-words">Abstract terms like &#8220;applause&#8221; connect naturally to many related concepts (acclaim, ovation, praise), while concrete nouns like &#8220;apple&#8221; have fewer true synonyms</li> <li class="whitespace-normal break-words">Physical thesauri faced space constraints, forcing editors to prioritize frequently needed alternatives</li> </ol> <p class="whitespace-pre-wrap break-words">Linguabase breaks from this tradition in several ways:</p> <ul class="[&amp;:not(:last-child)_ul]:pb-1 [&amp;:not(:last-child)_ol]:pb-1 list-disc space-y-1.5 pl-7"> <li class="whitespace-normal break-words"><strong>Comprehensive coverage</strong>: Unlike traditional thesauri that might omit everyday objects (containing &#8220;applause&#8221; and &#8220;appliance&#8221; but not &#8220;apple&#8221; or &#8220;apple pie&#8221;), Linguabase includes all words, including concrete nouns, specialized terminology, common objects, and thousands of encyclopedic proper nouns</li> <li class="whitespace-normal break-words"><strong>Multiple relationship types</strong>: Beyond synonyms and antonyms, Linguabase maps: <ul class="[&amp;:not(:last-child)_ul]:pb-1 [&amp;:not(:last-child)_ol]:pb-1 list-disc space-y-1.5 pl-7"> <li class="whitespace-normal break-words"><strong>Similar meanings</strong>: Words with close semantic relationships like &#8220;house,&#8221; &#8220;domicile,&#8221; and &#8220;lodge&#8221;</li> <li class="whitespace-normal break-words"><strong>Category members</strong>: Items of the same type such as &#8220;house,&#8221; &#8220;bungalow,&#8221; and &#8220;villa&#8221;</li> <li class="whitespace-normal break-words"><strong>Associative relationships</strong>: Words contextually related like &#8220;house,&#8221; &#8220;quarter,&#8221; and &#8220;dwell&#8221;</li> </ul> </li> <li class="whitespace-normal break-words"><strong>Weighted connections</strong>: Each relationship carries a decimal score (scores above 1 indicate strong correlation; scores between 0 and 1 represent lower confidence associations)</li> </ul> <p class="whitespace-pre-wrap break-words">This approach provides an average of 60 semantically connected words for each headword across all parts of speech, covering multiple senses and contextual usages that traditional reference works typically omit.</p> <h2 class="text-xl font-bold text-text-200 mt-1 -mb-0.5">Comprehensive Linguistic Coverage</h2> <p class="whitespace-pre-wrap break-words">Linguabase distinguishes itself through four key advantages:</p> <ul class="[&amp;:not(:last-child)_ul]:pb-1 [&amp;:not(:last-child)_ol]:pb-1 list-disc space-y-1.5 pl-7"> <li class="whitespace-normal break-words"><strong>Unparalleled scale</strong>: 1.1 million headwords far exceed traditional lexical databases like Princeton&#8217;s WordNet (1985)</li> <li class="whitespace-normal break-words"><strong>Extensive relationship network</strong>: 60 million weighted connections provide structure that large language models lack</li> <li class="whitespace-normal break-words"><strong>Multiple relationship types</strong>: Coverage extends beyond synonyms to include categorical, contextual, and associative connections</li> <li class="whitespace-normal break-words"><strong>Human oversight with AI enhancement</strong>: Human-curated content augmented by artificial intelligence</li> </ul> <h2 class="text-xl font-bold text-text-200 mt-1 -mb-0.5">Multi-Sense Representation</h2> <p class="whitespace-pre-wrap break-words">A crucial feature of Linguabase is its handling of words with multiple meanings:</p> <ul class="[&amp;:not(:last-child)_ul]:pb-1 [&amp;:not(:last-child)_ol]:pb-1 list-disc space-y-1.5 pl-7"> <li class="whitespace-normal break-words"><strong>Double meanings</strong> (technically called &#8220;homographs&#8221;): Words spelled identically but with entirely different meanings, often with different origins and pronunciations. English contains approximately 1,000-3,000 of these. <ul class="[&amp;:not(:last-child)_ul]:pb-1 [&amp;:not(:last-child)_ol]:pb-1 list-disc space-y-1.5 pl-7"> <li class="whitespace-normal break-words">Example: &#8220;bass&#8221; (low sound) vs. &#8220;bass&#8221; (type of fish)</li> <li class="whitespace-normal break-words">Example: &#8220;tear&#8221; (drop from eye) vs. &#8220;tear&#8221; (to rip)</li> </ul> </li> <li class="whitespace-normal break-words"><strong>Related meanings</strong> (technically called &#8220;polysemes&#8221;): Words with multiple distinct but connected definitions that have evolved from the same root, typically appearing as separate numbered entries in dictionaries <ul class="[&amp;:not(:last-child)_ul]:pb-1 [&amp;:not(:last-child)_ol]:pb-1 list-disc space-y-1.5 pl-7"> <li class="whitespace-normal break-words">Example: &#8220;head&#8221; (body part, leader of organization, front of ship)</li> <li class="whitespace-normal break-words">Example: &#8220;branch&#8221; (tree limb, division of organization)</li> <li class="whitespace-normal break-words">Example: &#8220;hiking&#8221; (walking on trails for recreation) vs. &#8220;hiking&#8221; (forcefully moving something upward, as in &#8220;hiking prices&#8221;)</li> </ul> </li> <li class="whitespace-normal break-words"><strong>Contextual flavors</strong>: Different aspects or dimensions of the same meaning that emphasize different connotations depending on context <ul class="[&amp;:not(:last-child)_ul]:pb-1 [&amp;:not(:last-child)_ol]:pb-1 list-disc space-y-1.5 pl-7"> <li class="whitespace-normal break-words">Example: &#8220;hiking&#8221; as recreation can emphasize either nature aspects (outdoors, scenery, wildlife) or exercise aspects (exertion, fitness, calorie-burning)</li> <li class="whitespace-normal break-words">Example: &#8220;coffee&#8221; as a beverage vs. a social ritual</li> <li class="whitespace-normal break-words">Example: &#8220;reading&#8221; as education vs. entertainment</li> </ul> </li> </ul> <p class="whitespace-pre-wrap break-words">Unlike polysemes which have distinct definitions, contextual flavors describe how words activate different associative networks while retaining the same core meaning. This distinction captures how people actually use and understand language in everyday contexts.</p> <h2 class="text-xl font-bold text-text-200 mt-1 -mb-0.5">Building the Database: Four Knowledge Sources</h2> <p class="whitespace-pre-wrap break-words">The creation of Linguabase involved four complementary knowledge sources that feed into an amalgamated scoring system:</p> <h3 class="text-lg font-bold text-text-200 mt-1 -mb-1.5">1. Reference Integration</h3> <p class="whitespace-pre-wrap break-words">We analyzed over 70 distinct lexicographic resources, including: <em>Wiktionary</em>, <em>WordNet</em>, <em>Getty Art &amp; Architecture Thesaurus</em>, <em>AGROVOC Thesaurus</em>, <em>Library of Congress Subject Headings</em>, <em>NASA Thesaurus</em>, <em>National Library of Medicine&#8217;s UMLS Metathesaurus</em>, <em>USDA National Agricultural Library Thesaurus</em>, <em>Moby Thesaurus II</em>, <em>Roget&#8217;s Thesaurus</em> variants, and the <em>Ethnographic Thesaurus</em>.</p> <p class="whitespace-pre-wrap break-words">This integration process combined relationships from multiple sources, with repeated occurrences of a relationship across multiple sources naturally boosting its confidence weight.</p> <h3 class="text-lg font-bold text-text-200 mt-1 -mb-1.5">2. Topic Modeling</h3> <p class="whitespace-pre-wrap break-words">To capture broader associations between words, we applied topic modeling to extensive collections of English prose. For each term, we extracted matching sentences and paragraphs, then used Latent Dirichlet Allocation—a statistical method that discovers abstract topics as collections of words that frequently appear together—to identify approximately 8 abstract topics per analysis. This computation-intensive analysis required supercomputing resources from the NSF-funded Extreme Science and Engineering Discovery Environment (XSEDE). The distributed processing allowed us to analyze thousands of text samples and generate millions of weighted word relationships in days rather than years.</p> <h3 class="text-lg font-bold text-text-200 mt-1 -mb-1.5">3. Structured Word Groups</h3> <p class="whitespace-pre-wrap break-words">Linguistics graduate students created over 10,000 word groups based on Library of Congress categories. These categories are significant because they were designed to organize millions of books written by countless authors throughout history, inherently reflecting the vast range of topics that writers have ever wanted to discuss.</p> <p class="whitespace-pre-wrap break-words">The Library of Congress classification system uses a hierarchical structure, beginning with broad parent categories (examples below) that branch into thousands of highly specific subcategories:</p> <ul class="[&amp;:not(:last-child)_ul]:pb-1 [&amp;:not(:last-child)_ol]:pb-1 list-disc space-y-1.5 pl-7"> <li class="whitespace-normal break-words">AC: Collections, Series, Collected works</li> <li class="whitespace-normal break-words">BF: Psychology, Parapsychology, Occult sciences</li> <li class="whitespace-normal break-words">GN: Anthropology, Ethnology, Folklore</li> <li class="whitespace-normal break-words">HQ: Family, Marriage, Women, Sexuality</li> <li class="whitespace-normal break-words">KF: United States federal law</li> </ul> <p class="whitespace-pre-wrap break-words">For example, under &#8220;QK&#8221; (Botany), our word groups included specific concepts like QK495 (Classification of plants as angiosperms), QK917 (Plant ecology and carnivorous plants). Similarly, within &#8220;VM&#8221; (Naval architecture), subcategories like VM156 (Shipbuilding materials), VM311 (Hull design), and VM747 (Marine engines) each generated multiple specialized vocabulary sets.</p> <p class="whitespace-pre-wrap break-words">This approach provided domain-specific vocabulary coverage across all fields of knowledge, including specialized terminology and high-frequency terms typically omitted from reference works.</p> <h3 class="text-lg font-bold text-text-200 mt-1 -mb-1.5">4. Large Language Model Enhancement</h3> <p class="whitespace-pre-wrap break-words">Linguabase uses advanced large language models to supplement the existing structured data in several crucial ways:</p> <ul class="[&amp;:not(:last-child)_ul]:pb-1 [&amp;:not(:last-child)_ol]:pb-1 list-disc space-y-1.5 pl-7"> <li class="whitespace-normal break-words"><strong>Expanding coverage for everyday terms</strong>: These models excel at generating rich associations for common terms like &#8220;apple pie&#8221; that lack substantial coverage in traditional reference works but evoke strong connections for most speakers</li> <li class="whitespace-normal break-words"><strong>Handling morphological variations</strong>: While traditional thesauri might contain &#8220;apply&#8221; but not &#8220;applies,&#8221; language models help generate complete paradigms across different parts of speech and inflected forms</li> <li class="whitespace-normal break-words"><strong>Identifying contextual flavors</strong>: Modern language models proved essential for recognizing and mapping the subtle connotative dimensions of words in different contexts</li> <li class="whitespace-normal break-words"><strong>Managing capitalization distinctions</strong>: These systems effectively differentiate between terms like &#8220;China&#8221; (country) vs. &#8220;china&#8221; (porcelain) or &#8220;Trump&#8221; (surname) vs. &#8220;trump&#8221; (card game), and correctly capitalize terms within the data graph</li> <li class="whitespace-normal break-words"><strong>Processing compound words</strong>: Advanced models naturally handle multi-word expressions like &#8220;New York&#8221; or &#8220;department store&#8221; without the parsing difficulties that traditional computational approaches often encounter</li> <li class="whitespace-normal break-words"><strong>Optimizing relationship rankings</strong>: In applications where only a few &#8220;best&#8221; word relations can be displayed, language models improved the prioritization of the most relevant connections</li> </ul> <p class="whitespace-pre-wrap break-words">This strategic use of large language models enhances areas where traditional lexicographic approaches have limitations while preserving the reliability of human-curated content for core semantic relationships.</p> <h2 class="text-xl font-bold text-text-200 mt-1 -mb-0.5">Data Processing and Weights</h2> <p class="whitespace-pre-wrap break-words">Linguabase employs a practical approach to relationship weighting. The 60 million weighted relationships derive from:</p> <ul class="[&amp;:not(:last-child)_ul]:pb-1 [&amp;:not(:last-child)_ol]:pb-1 list-disc space-y-1.5 pl-7"> <li class="whitespace-normal break-words">Frequency of appearance across multiple sources</li> <li class="whitespace-normal break-words">Editorial judgment about relevance and association strength</li> <li class="whitespace-normal break-words">Statistical significance from topic modeling</li> <li class="whitespace-normal break-words">Semantic proximity determined by language models</li> </ul> <p class="whitespace-pre-wrap break-words">The database operates through batch processing with targeted updates. A full rebuild takes approximately one week on consumer hardware. This approach prioritizes useful results over methodological complexity.</p> <h2 class="text-xl font-bold text-text-200 mt-1 -mb-0.5">Practical Applications</h2> <p class="whitespace-pre-wrap break-words">Linguabase powers two applications:</p> <h3 class="text-lg font-bold text-text-200 mt-1 -mb-1.5">In Other Words: Word Exploration Game</h3> <p class="whitespace-pre-wrap break-words">This interactive game allows players to navigate between concepts using meaningful connections. Players can find paths between seemingly unrelated words by traversing the weighted relationship network, demonstrating both the breadth and interconnectedness of language.</p> <h3 class="text-lg font-bold text-text-200 mt-1 -mb-1.5">Comprehensive Reference System</h3> <p class="whitespace-pre-wrap break-words">Our reference application extends traditional thesaurus functionality by:</p> <ul class="[&amp;:not(:last-child)_ul]:pb-1 [&amp;:not(:last-child)_ol]:pb-1 list-disc space-y-1.5 pl-7"> <li class="whitespace-normal break-words">Providing relationships for concrete objects traditionally omitted from thesauri</li> <li class="whitespace-normal break-words">Showing multiple senses and contextual variations for each word</li> <li class="whitespace-normal break-words">Offering weighted connections that indicate relationship strength</li> <li class="whitespace-normal break-words">Including associative relationships beyond simple synonyms</li> </ul> <h2 class="text-xl font-bold text-text-200 mt-1 -mb-0.5">Practical Innovation</h2> <p class="whitespace-pre-wrap break-words">The value of Linguabase lies in its thorough integration of classical lexicography with modern language models. By combining multiple knowledge sources and enhancing them with advanced AI, we&#8217;ve created a resource that&#8217;s:</p> <ul class="[&amp;:not(:last-child)_ul]:pb-1 [&amp;:not(:last-child)_ol]:pb-1 list-disc space-y-1.5 pl-7"> <li class="whitespace-normal break-words">More comprehensive than traditional lexical databases</li> <li class="whitespace-normal break-words">More structured than raw large language model output</li> <li class="whitespace-normal break-words">Accessible for both specialized NLP applications and everyday language exploration</li> </ul> <p class="whitespace-pre-wrap break-words">This approach demonstrates how traditional linguistic resources can be augmented rather than replaced by large language models, creating a practical tool that advances the field of lexicography.</p> <hr /> <p class="whitespace-pre-wrap break-words"><em>The IDEA Linguabase was developed with support from the NSF-funded Extreme Science and Engineering Discovery Environment (XSEDE), grant #IRI130011.</em></p> </article> </section> <aside id="sidebar"> <section class="widget"> <article class="post"> <p>The Linguabase is available for licensing for use in apps, web sites, cloud services, and other applications that require a large thesaurus of weighted word relationships.</p> <div><a href="/contact/"> Contact us to discuss licensing <i class="icon-arrow-right-alt1"></i></a></div> </article> <article class="post"> <p>The Linguabase powers <a href="/language-apps/">our apps</a>.</p> </article> <article class="post"> <p> Travel through a galaxy of words in this matching game.</p> <div><a href="/otherwordly/"> Go to Otherwordly <i class="icon-arrow-right-alt1"></i></a></div> </article> <article class="post"> <p> Delve into words, definitions, and real-world usage examples.</p> <div><a href="/inotherwords/"> Go to In Other Words <i class="icon-arrow-right-alt1"></i></a></div> </article> </section> </aside> </div> </div> </div> </main> <aside class="items-holder"> <div class="container"> <div class="list-items widget-apps-widget"><h3>Read more about <br class="separator"><strong>our language-related apps.</strong></h3> <ul> <li> <div class="item-holder"> <a href="http://www.idea.org/otherwordly"> <picture> <!--[if IE 9]><video style="display: none;"><![endif]--> <source srcset="https://www.idea.org/blog/wp-content/uploads/2023/09/otherwordly_appicon_1024px_rounded_01-35x36.png, https://www.idea.org/blog/wp-content/uploads/2023/09/otherwordly_appicon_1024px_rounded_01-70x70.png 2x" media="(max-width: 767px)"> <source srcset="https://www.idea.org/blog/wp-content/uploads/2023/09/otherwordly_appicon_1024px_rounded_01-70x70.png, https://www.idea.org/blog/wp-content/uploads/2023/09/otherwordly_appicon_1024px_rounded_01-140x140.png 2x"> <!--[if IE 9]></video><![endif]--> <img src="https://www.idea.org/blog/wp-content/uploads/2023/09/otherwordly_appicon_1024px_rounded_01-70x70.png" alt=""> </picture> <span class="title" style="color:#c46e3c">OtherWordly</span> </a> </div> </li> <li> <div class="hr"></div> <div class="item-holder"> <a href="http://www.idea.org/inotherwords"> <picture> <!--[if IE 9]><video style="display: none;"><![endif]--> <source srcset="https://www.idea.org/blog/wp-content/uploads/2023/09/inotherwords_appicon_1024px_rounded_01-35x36.png, https://www.idea.org/blog/wp-content/uploads/2023/09/inotherwords_appicon_1024px_rounded_01-70x70.png 2x" media="(max-width: 767px)"> <source srcset="https://www.idea.org/blog/wp-content/uploads/2023/09/inotherwords_appicon_1024px_rounded_01-70x70.png, https://www.idea.org/blog/wp-content/uploads/2023/09/inotherwords_appicon_1024px_rounded_01-140x140.png 2x"> <!--[if IE 9]></video><![endif]--> <img src="https://www.idea.org/blog/wp-content/uploads/2023/09/inotherwords_appicon_1024px_rounded_01-70x70.png" alt=""> </picture> <span class="title" style="color:#8b8be0">In Other Words</span> </a> </div> </li> <li> <div class="hr"></div> <div class="item-holder"> <a href="http://www.idea.org/linguabase"> <picture> <!--[if IE 9]><video style="display: none;"><![endif]--> <source srcset="https://www.idea.org/blog/wp-content/uploads/2023/09/linguabase_appicon_1024px_rounded_01-35x36.png, https://www.idea.org/blog/wp-content/uploads/2023/09/linguabase_appicon_1024px_rounded_01-70x70.png 2x" media="(max-width: 767px)"> <source srcset="https://www.idea.org/blog/wp-content/uploads/2023/09/linguabase_appicon_1024px_rounded_01-70x70.png, https://www.idea.org/blog/wp-content/uploads/2023/09/linguabase_appicon_1024px_rounded_01-140x140.png 2x"> <!--[if IE 9]></video><![endif]--> <img src="https://www.idea.org/blog/wp-content/uploads/2023/09/linguabase_appicon_1024px_rounded_01-70x70.png" alt=""> </picture> <span class="title" style="color:#b859cc">Liguabase</span> </a> </div> </li> </ul> </div> </div> </aside> <footer id="footer"> <div class="container"> <nav class="page-nav"> <ul> <li id="menu-item-4498" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-home menu-item-4498"><a href="https://www.idea.org/blog/">Home</a></li> <li id="menu-item-4497" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-4497"><a href="https://www.idea.org/blog/about/">Mission &#038; history</a></li> <li id="menu-item-4499" class="popup menu-item menu-item-type-custom menu-item-object-custom menu-item-4499"><a title="popup" class="js-btn-show-popups" href="#wrapper">Browse topics</a></li> <li id="menu-item-4666" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-4666"><a href="https://www.idea.org/blog/contact/">Contact IDEA</a></li> <li>Copyright &copy; 2025 <a href="https://www.idea.org/blog">IDEA</a></li> </ul> </nav> </div> </footer> </div> <link rel='stylesheet' id='yarppRelatedCss-css' href='//www.idea.org/blog/wp-content/plugins/yet-another-related-posts-plugin/style/related.css' type='text/css' media='all' /> <script type='text/javascript'> /* <![CDATA[ */ var wpcf7 = {"apiSettings":{"root":"https:\/\/www.idea.org\/blog\/wp-json\/contact-form-7\/v1","namespace":"contact-form-7\/v1"},"cached":"1"}; /* ]]> */ </script> <script type='text/javascript' src='//www.idea.org/blog/wp-content/plugins/contact-form-7/includes/js/scripts.js'></script> <script type='text/javascript' src='//www.idea.org/blog/wp-content/themes/ideaorg/js/jquery.main.js'></script> <script type='text/javascript' src='//www.idea.org/blog/wp-includes/js/wp-embed.min.js'></script> </body> <!-- Google tag (gtag.js) --> <script async src="https://www.googletagmanager.com/gtag/js?id=G-Y68B45MT0H"></script> <script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'G-Y68B45MT0H'); </script> </html> <!-- Performance optimized by W3 Total Cache. Learn more: https://www.w3-edge.com/products/ Object Caching 1712/26 objects using disk Page Caching using disk: enhanced Database Caching using disk Served from: www.idea.org @ 2025-04-05 16:25:28 by W3 Total Cache -->

Pages: 1 2 3 4 5 6 7 8 9 10