CINXE.COM
OpenLink Virtuoso (Product Blog)
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <!-- --> <html xmlns="http://www.w3.org/1999/xhtml" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:sioct="http://rdfs.org/sioc/types#" xmlns:sioc="http://rdfs.org/sioc/ns#" xmlns:cert="http://www.w3.org/ns/auth/cert#"> <head profile="http://gmpg.org/xfn/11 http://purl.org/NET/erdf/profile http://internetalchemy.org/2003/02/profile http://www.w3.org/1999/xhtml/vocab#"><link rel="stylesheet" href="/weblog/public/css/webdav.css" type="text/css" /> <link rel="shortcut icon" href="/weblog/public/images/fav.ico"/> <base href="https://virtuoso.openlinksw.com/blog/index.vspx" /><!--[if IE]></base><![endif]--> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <script type="text/javascript" src="/weblog/public/scripts/form.js"></script> <script type="text/javascript" src="/weblog/public/scripts/plugins.js"></script> <title>OpenLink Virtuoso (Product Blog)</title> <link rel="alternate" type="application/rss+xml" title="OpenLink Virtuoso (Product Blog) RSS" href="http://virtuoso.openlinksw.com/blog/gems/rss.xml" /> <link rel="meta" type="application/rdf+xml" title="OpenLink Virtuoso (Product Blog) RDF" href="http://virtuoso.openlinksw.com/blog/gems/index.rdf" /> <link rel="alternate" type="application/atom+xml" title="OpenLink Virtuoso (Product Blog) Atom" href="http://virtuoso.openlinksw.com/blog/gems/atom.xml" /> <link rel="alternate" type="application/atomserv+xml" title="OpenLink Virtuoso (Product Blog) Atom" href="http://virtuoso.openlinksw.com/Atom/136/intro" /> <link rel="pingback" href="http://virtuoso.openlinksw.com/mt-tb" /> <link rel="meta" type="application/rdf+xml" title="FOAF" href="http://www.openlinksw.com/dataspace/organization/vdb/about.rdf" /> <link rel="meta" type="application/rdf+xml" title="FOAF" href="http://virtuoso.openlinksw.com/blog/gems/foaf.xml" /> <link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://virtuoso.openlinksw.com/blog/gems/rsd.xml" /> <link rel="subscriptions" type="text/x-opml" title="Blog Roll" href="http://virtuoso.openlinksw.com/blog/gems/opml.xml?:c=6" /><link rel="subscriptions" type="text/x-opml" title="Documentation (Atom Feed)" href="http://virtuoso.openlinksw.com/blog/gems/opml.xml?:c=25" /><link rel="subscriptions" type="text/x-opml" title="Documentation (RDF Feed)" href="http://virtuoso.openlinksw.com/blog/gems/opml.xml?:c=24" /><link rel="subscriptions" type="text/x-opml" title="Online Demos & Tutorials" href="http://virtuoso.openlinksw.com/blog/gems/opml.xml?:c=17" /><link rel="subscriptions" type="text/x-opml" title="Online Documentation" href="http://virtuoso.openlinksw.com/blog/gems/opml.xml?:c=16" /><link rel="subscriptions" type="text/x-opml" title="Support" href="http://virtuoso.openlinksw.com/blog/gems/opml.xml?:c=18" /> <link rel="meta" type="application/rdf+xml" title="SIOC" href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/sioc.rdf" /> <meta name="owner" content="Virtuoso Data Space Bot" /> <meta name="authors" content="Virtuoso Data Space Bot" /> <meta name="description" content="A great place to track Virtuoso's rapid evolution." /> <meta name="keywords" content=""Linked, Data", "Semantic, Web", "Database", "Benchmark", "Column, Store", RDF, SPARQL, NewSQL, SQL, NoSQL, BigData" /> <link href="/weblog/public/css/default.css" rel="stylesheet" type="text/css" media="screen" title="default" /> <link rel="schema.dc" href="http://purl.org/dc/elements/1.1/" /> <link rel="schema.foaf" href="http://xmlns.com/foaf/0.1/" /> <link rel="schema.rss" href="http://purl.org/rss/1.0/" /> <link rel="schema.geo" href="http://www.w3.org/2003/01/geo/wgs84_pos#" /> <link rel="schema.rdfs" href="http://www.w3.org/2000/01/rdf-schema#" /> <meta name="dc.creator" content="Virtuoso Data Space Bot" /> <meta name="dc.title" content="OpenLink Virtuoso (Product Blog)" /> <meta name="dc.rights" content="OpenLink Software 1998-2006" /> <meta name="geo.position" content="42.485836;-71.214287" /> <meta name="ICBM" content="42.485836, -71.214287" /> <link rel="search" type="application/opensearchdescription+xml" title="OpenSearch Description" href="http://virtuoso.openlinksw.com/weblog/public/search.vspx?blogid=136&type=text&kwds=dir&OpenSearch" /> <script type="text/javascript"> var toolkitPath="/ods/oat"; var imagePath="/ods/images/oat/"; var featureList=["ajax", "anchor", "ghostdrag", "dav"]; </script> <script type="text/javascript" src="/ods/oat/loader.js"></script> <script type="text/javascript" src="/ods/js/app.js"></script> <script type="text/javascript"> function weblog2Init() { OAT.Preferences.imagePath = '/ods/images/oat/'; OAT.Anchor.imagePath = OAT.Preferences.imagePath; OAT.Anchor.zIndex = 1001; if (0 >= 1) generateAPP('texttd', { title:"Related links", width:300, height:200, appActivation:"click", useRDFB:true } ); } OAT.MSG.attach(OAT, OAT.MSG.OAT_LOAD, weblog2Init); </script> </head> <body><script type="text/javascript" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:vm="http://www.openlinksw.com/vspx/weblog/" xmlns:vdeps="http://www.openlinksw.com/vspx/deps/" xmlns:ods="http://www.openlinksw.com/vspx/ods/" xmlns:ev="http://www.w3.org/2001/xml-events"> function doPost (frm_name, name) { var frm = document.forms[frm_name]; frm.__submit_func.value = '__submit__'; frm.__submit_func.name = name; frm.submit (); } function doPostValue (frm_name, name, value) { var frm = document.forms[frm_name]; frm.__submit_func.value = value; frm.__submit_func.name = name; frm.submit (); } </script><script type="text/javascript" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:vm="http://www.openlinksw.com/vspx/weblog/" xmlns:vdeps="http://www.openlinksw.com/vspx/deps/" xmlns:ods="http://www.openlinksw.com/vspx/ods/" xmlns:ev="http://www.w3.org/2001/xml-events"> function doAutoSubmit (frm, ctrl) { frm.__event_target.value = frm.name; frm.__event_initiator.value = ctrl.name; frm.submit (); } </script><noscript xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:vm="http://www.openlinksw.com/vspx/weblog/" xmlns:vdeps="http://www.openlinksw.com/vspx/deps/" xmlns:ods="http://www.openlinksw.com/vspx/ods/" xmlns:ev="http://www.w3.org/2001/xml-events"> Your browser either does not support JavaScript or it is disabled in your browser's settings. Please consult your browser's documentation for information about enabling this feature. </noscript> <div id="quickSub" style="position:absolute; visibility:hidden; z-index:1000;" onmouseout="return timeqs();" onmousemove="return delayqs();"></div> <script type="text/javascript"> window.onload = function (e) { var cookie = readCookie ("style"); var title = cookie ? cookie : getPreferredStyleSheet(); setActiveStyleSheet (title, 0); } </script><form name="page_form" method="post" action="/blog/index.vspx" accept-charset="utf-8" enctype="multipart/form-data" onsubmit="sflag=true;"> <input type="hidden" name="page_2586a52d45e2079afbb5169d58681d302_templates_openlink_index_vspx_view_state" value="wbwctQ1sb2dpbl9jb250cm9swbwEtRV2bF9sb2dvdXRfaW5fcHJvZ3Jlc3O8ALUQdmxfYXV0 aGVudGljYXRlZLwAtQx2X3NvcnRfb3JkZXK1BGRlc2O1BXBvc3RzwbwHtQK8CrUCvAC8Csy8 AbwBvAC1CnBvc3RzX29mZnO1ATC1DWNvbW1lbnRzX2xpc3TBvAC1BWNvb2sxvAG1CW5vdGlm eV9tZbwAtQhzZW1waW5nMbwAtQtzYWxtb25fcGluZ7wAtRVjb21tZW50MV9kaXNhYmxlX2h0 bWy8ALUFcG1vbjG1ATG1BXB5ZWFytQQyMDE2tQVzc3Jlc8G8B7UBzLUBzLwAzLwBvAG8ALVE cGFnZV8yNTg2YTUyZDQ1ZTIwNzlhZmJiNTE2OWQ1ODY4MWQzMDJfdGVtcGxhdGVzX29wZW5s aW5rX2luZGV4X3ZzcHjBvQAAAOq1DnBvc3RfdG9fcmVtb3ZlzLURcHJldmlld19wb3N0X21v ZGW8ALUGYmxvZ2lktQMxMza1B2luc3RfaWS8A7UJaW5zdF9uYW1ltRB2ZGIncyBCTE9HIFsx MzZdtQV0aXRsZbUgT3BlbkxpbmsgVmlydHVvc28gKFByb2R1Y3QgQmxvZym1EHBvc3RzX3Nv cnRfb3JkZXK1BGRlc2O1BHBhZ2W1ALUJdXNlcl9uYW1lzLUNZGVsX3VzZXJfbmFtZcy1B3Vz ZXJfaWS8ALUGZG9tYWlutRd2aXJ0dW9zby5vcGVubGlua3N3LmNvbbUOY3VycmVudF9kb21h aW61F3ZpcnR1b3NvLm9wZW5saW5rc3cuY29ttRBjdXJyZW50X3RlbXBsYXRltSEvREFWL1ZB RC9ibG9nMi90ZW1wbGF0ZXMvb3Blbmxpbmu1C2Jsb2dfYWNjZXNzvAO1DGN1cnJlbnRfaG9t ZbUPL2Jsb2cvdmRiL2Jsb2cvtQRob3N0tRd2aXJ0dW9zby5vcGVubGlua3N3LmNvbbUFY2hv c3S1End3dy5vcGVubGlua3N3LmNvbbUEYmFzZbUGL2Jsb2cvtQRob21ltQC1AnVytSRodHRw Oi8vdmlydHVvc28ub3Blbmxpbmtzdy5jb20vYmxvZy+1BWVtYWlstRZraWRlaGVuQG9wZW5s aW5rc3cuY29ttQZmZW1haWy1ALUIc3JjX3VyaTHMtQhmX2NhdF9pZMy1CmZfY2F0X25hbWW1 ALUFZGF0YTHMtQVwaW5nc7UAtQVocGFnZcy1BGNub3S8AbUHaXNfYmxvZ7wAtQlpbmNsdXNp b268AbUDdGl0tQC1A2F1dLUAtQRtYWlstQC1B3NyY190aXS1ALUHc3JjX3VyabUAtQNyc3O1 ALUJeGZuX3dvcmRztQC1B29sZF9yc3O1ALUHdXBkX3Blcsy1CHVwZF9mcmVxzLUEbGFuZ7UA tQZmb3JtYXS1ALUBeb0AAAfgtQFtvAG1AWS8C7UFYWRheXPMtQdmb3JkYXRl0ws66I9ZZL1D IAC1CWZvcmRhdGVfbrwBtQVkcHJldsy1BWRuZXh0zLUEY29webUbT3BlbkxpbmsgU29mdHdh cmUgMTk5OC0yMDA2tQRkaXNjtQC1BWFib3V0tTJBIGdyZWF0IHBsYWNlIHRvIHRyYWNrIFZp cnR1b3NvJ3MgcmFwaWQgZXZvbHV0aW9uLrUHc2VsX2NhdLUAtQZwb3N0aWTMtQRudWlkzLUC dHq8+7UEY29udLwBtQRjb21tvAG1CXRiX25vdGlmebwAtQNyZWfMtQRmaWx0tQkqZGVmYXVs dCq1CGVkaXRwb3N0zLUEbWJpZMy1BG1zZXTMtRJoYXZlX2NvbXVuaXR5X2Jsb2fMtQZjYXRf aWTMtQhwb3AzYWRkcrUAtQhwb3AzcG9ydLUDMTEwtQlwaW5nX2Rlc2PMtQhwaW5nX2VuZMy1 CnBpbmdfcHJvdG/MtQdwaW5nX2lkvP+1CXJpY2hfbW9kZbwBtQhhdXRvZGlzY8y1CGF1dG9m ZWVkzLUNcG9zdHNfdG9fc2hvd7wKtQdidG5fYm1rzLUJYmxvZ192aWV3vAC1C2NvbW1lbnRz X25vvAC1BXRiX25vvAC1DmNvbW1lbnRfZmlsdGVyzLUKbG9naW5fcGFyc7UAtQhlZGl0X3Rh Z8y1FWNvbW1lbnRfdnJmeV9vbGRfcmVzcMy1EWNvbW1lbnRfdnJmeV9yZXNwvBm1BGZ0dHHM tQljb250X2VkaXTMtQVjYXRpZMy1BXRhZ2lkzLUNc3RvY2tfaW1nX2xvY7UWL3dlYmxvZy9w dWJsaWMvaW1hZ2VzL7UOY3VzdG9tX2ltZ19sb2O1Fi93ZWJsb2cvcHVibGljL2ltYWdlcy+1 CmN1c3RvbV9yc3PBvAC1CGFyY2hfc2VszLUJYXJjaF92aWV3tQVtb250aLUOc2hvd19mdWxs X3Bvc3S8ALUSc2hvd19jb21tZW50X2lucHV0vAC1DHJldHVybl91cmxfMbUWL2Jsb2cvaW5k ZXgudnNweD9wYWdlPbUEdGVtcMy1CHRlbXBsYXRlzLUIY29tbV9yZWbMtQp0b19yZXN0b3Jl tQC1D29kc2Jhcl9hcHBfdHlwZcy1FG9kc2Jhcl9hcHBfZGF0YXNwYWNlzLUTb2RzYmFyX2Fw cGluc3RfdHlwZcy1DG9kc2Jhcl9mbmFtZcy1DW9kc2Jhcl91X2hvbWW1ALUSb2RzYmFyX2xv Z2lucGFyYW1ztQC1Em9kc2Jhcl9zaG93X3NpZ25pbrUEdHJ1ZbUQb2RzYmFyX29kc19ncGF0 aLUkaHR0cHM6Ly92aXJ0dW9zby5vcGVubGlua3N3LmNvbS9vZHMvtRVvZHNiYXJfZGF0YXNw YWNlX3BhdGi1Kmh0dHBzOi8vdmlydHVvc28ub3Blbmxpbmtzdy5jb20vZGF0YXNwYWNlL7UQ b2RzYmFyX2lub3V0X2FycrUAtRJvZHNiYXJfY3VycmVudF91cmy1Bi9ibG9nL7UIY29tbWVu dDK1ALUFcmVhbG3M" /> <input type="hidden" name="nonce" value="c18a30a7250521eeab84169bf86613cc" /> <input type="hidden" name="__submit_func" value="" /> <input type="hidden" name="__event_target" value="" /> <input type="hidden" name="__event_initiator" value="" /> <input type="hidden" name="page" value="" /> <div id="odsBarCss" style="display:none"> </div><script type="text/javascript"> <!-- var _head = document.getElementsByTagName('head')[0]; var odsbarCSSloaded = 0; for (var i = 0; i < _head.childNodes.length; i++) { if (typeof(_head.childNodes[i].href) !== 'undefined') { if (_head.childNodes[i].href.indexOf('ds-bar.css') > 0) { odsbarCSSloaded = 1; break; } } } function loadCSS(cssContainer) { var _head = document.getElementsByTagName("head")[0]; var cssObj = document.getElementById(cssContainer); var cssUrl = ''; if (typeof(cssContainer)!='undefined' && cssContainer.length) cssUrl = cssContainer; if (cssObj != 'undefined' && cssObj.innerHTML.length) { var cssNode = document.createElement('style'); cssNode.type = 'text/css'; if (cssNode.styleSheet) { // IE cssNode.styleSheet.cssText = cssObj.innerHTML; } else { cssNode.textContent = cssObj.innerHTML; } _head.appendChild(cssNode); } else if(cssUrl.length) { var cssNode = document.createElement('link'); cssNode.type = 'text/css'; cssNode.rel = 'stylesheet'; cssNode.href = cssUrl; _head.appendChild(cssNode); } return; } if (odsbarCSSloaded == 0) loadCSS('odsBarCss'); var ODSInitArray = new Array(); window._apiKey=''; //Google maps key needed before OAT load window.YMAPPID =''; //Yahoo maps key needed before OAT load if (typeof (OAT) == 'undefined') { var toolkitPath = "https://virtuoso.openlinksw.com/ods/oat"; var toolkitImagesPath = "https://virtuoso.openlinksw.com/ods/images/oat/"; var featureList = []; var script = document.createElement("script"); script.src = 'https://virtuoso.openlinksw.com/ods/oat/loader.js'; _head.appendChild(script); } function init() { OAT.Preferences.imagePath="/ods/images/oat/"; OAT.Preferences.stylePath="/ods/oat/styles/"; OAT.Loader.load(["ajax", "xml", "dialog"], function(){}); if (typeof ODSInitArray !== 'undefined') { for (var i = 0; i < ODSInitArray.length; i++) try { ODSInitArray[i](); } catch (err) { alert ('Error in function call: ' + err.message.toString()); // XXX add error logging } } } function submitenter(fld, btn, e) { var keycode; if (fld == null || fld.form == null) return true; if (window.event) keycode = window.event.keyCode; else if (e) keycode = e.which; else return true; if (keycode == 13) { doPost (fld.form.name, btn); return false; } return true; } function getUrlOnEnter(e) { var keycode; if (window.event) keycode = window.event.keyCode; else if (e) keycode = e.which; else return true; if (keycode == 13) { document.location.href = 'https://virtuoso.openlinksw.com/ods/search.vspx?q='+$('odsbar_search_text').value+ ''+ '&ontype=WEBLOG2' ; return false; } return true; } function showSSLLink() { if (inFrame) return; if (document.location.protocol == 'https:') return; var x = function(data) { var o = null; try { o = OAT.JSON.parse(data); } catch (e) { o = null; } if (o && o.sslPort && !$('a_ssl_link')) { var href = 'https://' + document.location.hostname + ((o.sslPort != '443')? ':'+o.sslPort: '') + document.location.pathname + document.location.search + document.location.hash; var a = OAT.Dom.create("a"); a.id = 'a_ssl_link'; a.href = href; var img = OAT.Dom.create('img'); img.src = '/ods/images/icons/lock_16.png'; img.alt = 'ODS SSL Link'; a.appendChild(img); $('span_ssl_link').appendChild(a); } } OAT.AJAX.GET('/ods/api/server.getInfo?info=sslPort', false, x, {onstart: function(){}, onend: function(){}}); } //--> </script> <div id="ods_bar_loading" style="background-color:#DDEFF9;height: 62px;padding:5px 0px 0px 5px;display:none;"><img src="/ods/images/oat/Ajax_throbber.gif" alt="loading..." /><span> Loading... please wait.</span> </div> <div id="ods_bar_odslogin" style="display:none;text-align:right"><a href="https://virtuoso.openlinksw.com/ods/login.vspx?URL=%2Fblog%2F">Sign In</a> | Sign Up </div> <div id="HD_ODS_BAR" style="display:none;"> <div id="ods_bar"> <div id="ods_bar_content"> <div id="ods_bar_top"> <ul id="ods_bar_first_lvl"> <li class="home_lnk"> <a href="https://virtuoso.openlinksw.com/ods/sfront.vspx" ><img src="https://virtuoso.openlinksw.com/ods/images/odslogosml.png" border="0" title="ODS Home" /></a> </li> <li class=""><a href="https://virtuoso.openlinksw.com/dataspace/all/community">Community</a> </li> <li class=""><a href="https://virtuoso.openlinksw.com/dataspace/all/briefcase">Briefcase</a> </li> <li class="sel"><a href="https://virtuoso.openlinksw.com/dataspace/all/weblog">Weblog</a> </li> <li class=""><a href="https://virtuoso.openlinksw.com/dataspace/all/wiki">Wiki</a> </li> <li class=""><a href="https://virtuoso.openlinksw.com/dataspace/all/bookmark">Bookmarks</a> </li> <li class=""><a href="https://virtuoso.openlinksw.com/ods/gtags.vspx">Tags</a> </li> <li class=""> <a href="https://virtuoso.openlinksw.com/ods/search.vspx?ontype=WEBLOG2"><img class="tab_img" src="https://virtuoso.openlinksw.com/ods/images/search.png" alt="search icon" /> </a> </li> <li><input type="text" name="odsbar_search_text" value="" size="10" id="odsbar_search_text" class="textbox" onkeypress="return getUrlOnEnter(event)" /> </li> </ul> <div id="ods_bar_top_cmds"> <a href="https://virtuoso.openlinksw.com/ods/login.vspx?URL=%2Fblog%2F">Sign In</a> | Sign Up | <a href="https://virtuoso.openlinksw.com/ods/help.vspx" target="_blank">Help</a> | <span onclick="javascript: ODSDIALOG.aboutDialog('ODS'); return false;" title="About" style="cursor: pointer;">About</span><span id="span_ssl_link"> </span><script type="text/javascript"> ODSInitArray.push(function(){OAT.Loader.load(["ajax", "json"], function(){showSSLLink();});}); </script> </div> </div> <ul id="ods_bar_second_lvl"> <li> <a href="https://virtuoso.openlinksw.com/dataspace/dav/weblog/Comunity%20blog">Comunity blog </a> </li> <li> <a href="https://virtuoso.openlinksw.com/dataspace/kidehen@openlinksw.com/weblog/kidehen@openlinksw.com%27s%20BLOG%20%5B127%5D">kidehen@openlinksw.com's BLOG [127] </a> </li> <li> <a href="https://virtuoso.openlinksw.com/dataspace/uda/weblog/uda%27s%20BLOG%20%5B135%5D">uda's BLOG [135] </a> </li> <li> <a href="https://virtuoso.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D">vdb's BLOG [136] </a> </li> <li><a href="https://virtuoso.openlinksw.com/ods/search.vspx?apps=apps&q=Weblog">more...</a> </li><li> </li> </ul> </div> <div id="ods_bar_bot"> <div id="ods_bar_home_path"><a href="/ods/?sid=&realm=wa">ODS Home</a> > Weblog </div> <div id="ods_bar_data_space_indicator"> </div> </div> </div> </div><p style="font-size: 1pt;margin: 0;padding: 0;" id="ods_bar_sep"> </p><script type="text/javascript"> <!-- var userIsLogged; userIsLogged = 0; var notLoggedShowSignIn; notLoggedShowSignIn = 0; var notLoggedShowOdsBar notLoggedShowOdsBar = 0; function applyTransparentImg(parent_elm) { if (!OAT.Browser.isIE) return; var img_elements = parent_elm.getElementsByTagName('IMG'); for (var i = 0; i < img_elements.length; i++) { var img_elm = img_elements[i]; var path = img_elm.src; if (img_elements[i].src.toLowerCase().indexOf(".png") > 0) { var tmp_img_obj = document.createElement("img"); tmp_img_obj.src = img_elm.src; img_elm.src = 'https://virtuoso.openlinksw.com/ods/images/odsbar_spacer.png'; img_elm.style.height = tmp_img_obj.height; img_elm.style.width = tmp_img_obj.width; img_elm.style.filter = "progid:DXImageTransform.Microsoft.AlphaImageLoader(src='"+path+"', sizingMethod='scale')"; } } } function create_cookie (name, value, days) { if (days) { var date = new Date (); date.setTime (date.getTime () + (days*24*60*60*1000)); var expires = "; expires=" + date.toGMTString (); } else var expires = ""; document.cookie = name + "=" + value + expires + "; path=/"; } function read_cookie (name) { var name_eq = name + "="; var ca = document.cookie.split (';'); for (var i=0; i < ca.length; i++) { var c = ca[i]; while (c.charAt (0) == ' ') c = c.substring (1, c.length); if (c.indexOf (name_eq) == 0) return c.substring (name_eq.length, c.length); } return null; } var OATWaitCount = 0; var inFrame=0; if (window.top === window.self) { create_cookie ('interface', 'vspx', 1); } else { inFrame = 1; ODSInitArray.push(function(){OAT.Dom.hide('FT');}); } function odsbarSafeInit() { if (inFrame) return; if (typeof (OAT) !== 'undefined') { // ods_bar_state_set (read_cookie ('odsbar_state')); if (userIsLogged || notLoggedShowOdsBar) { applyTransparentImg (document.getElementById ('ods_bar')); OAT.Dom.show('HD_ODS_BAR'); } else if (notLoggedShowSignIn != 0) { OAT.Dom.show('ods_bar_odslogin'); } } else { OATWaitCount++; if (OATWaitCount <= 100) setTimeout(odsbarSafeInit, 200); } } odsbarSafeInit(); //--> </script> <table id="pagecontainer" cellspacing="0" cellpadding="0" width="100%" typeof="sioct:Weblog" about="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D"> <tr> <td colspan="3"> <table id="header" cellpadding="0" cellspacing="0"> <tr> <td> <h1><a href="index.vspx" class="title_link" property="dc:title">OpenLink Virtuoso (Product Blog)</a></h1> </td> <td class="right"> <div id="login-info-ctr"><img class="title_icon" alt="Not logged in" title="Not logged in" src="/weblog/public/images/lock_16.png" />You are not logged in </div> <div class="login_link"><a class="button" href="index.vspx?page=login">Login</a> </div> <div id="login_btn"> </div> </td> </tr> <tr> <td> <h2> SQL, SPARQL, RDF, XQuery, XPath, XSLT, XML, and more.. </h2> </td> <td class="right"> <a href="index.vspx" class="blog_selected">Weblog</a> | <a href="index.vspx?page=linkblog" class="">LinkBlog</a> | <a href="index.vspx?page=summary" class="">Summary</a> | <a href="index.vspx?page=archive" class="">Archive</a> | <a href="/weblog/public/search.vspx?blogid=136">Search</a> <script type="text/javascript"> <!-- function submitenter(myfield,e) { var keycode; if (window.event) keycode = window.event.keyCode; else if (e) keycode = e.which; else return true; if (keycode == 13) { //myfield.form.submit(); doPost ('page_form', 'GO'); return false; } else return true; } //--> </script> <input type="text" name="txt" value="" size="10" class="textbox" onkeypress="return submitenter(this,event)" /><a href="javascript:void(0)" onclick="javascript: doPost ('page_form', 'GO'); return false"><img src="/weblog/public/images/go_16.png" border="0" id="search_button" title="Search" alt="Search" /></a> </td> </tr> </table> <div id="navbartop"> <div>Entries: [ <a href="#1867">1</a> | <a href="#1865">2</a> | <a href="#1863">3</a> | <a href="#1861">4</a> | <a href="#1859">5</a> | <a href="#1857">6</a> | <a href="#1855">7</a> | <a href="#1853">8</a> | <a href="#1850">9</a> | <a href="#1846">10</a> ]</div> </div> <div id="error"> </div> </td> </tr> <tr> <td class="box" id="left"> <div class="box"> <h2>Details</h2> <div class="roll"> <div typeof="foaf:Person" about="http://www.openlinksw.com/dataspace/organization/vdb#this"> <div> </div> <div property="foaf:name"> Virtuoso Data Space Bot </div> <div> Burlington, United States </div> <div> <a rel="foaf:mbox" href="mailto:kidehen@openlinksw.com"><img border="0" alt="E-mail" src="/weblog/public/images/email2.gif" /> </a> </div> <div> </div> <div> </div> <div><a href="http://www.openlinksw.com/dataspace/organization/vdb/about.rdf" class="ods-foaf-link"><img border="0" alt="FOAF" title="FOAF" hspace="3" src="/weblog/public/images/foaf.png" />FOAF</a> </div> <div><a href="http://www.openlinksw.com/dataspace/organization/vdb" rel="rdfs:seeAlso">Full profile</a> </div></div> </div> <h2>Subscribe</h2> <div class="roll"> <div> <a href="http://virtuoso.openlinksw.com/blog/gems/atom.xml" class="atom-link"><img border="0" alt="ATOM" title="ATOM" hspace="3" src="/weblog/public/images/atom-icon-16.gif" />Atom 1.0 </a></div> <div> <a href="http://virtuoso.openlinksw.com/blog/gems/rss.xml" class="rss-link"><img border="0" alt="RSS" title="RSS" hspace="3" src="/weblog/public/images/rss-icon-16.gif" />RSS 2.0 </a></div> <div> <a href="http://virtuoso.openlinksw.com/blog/gems/rss-usm.xml" class="rss-usm-link"><img border="0" alt="RSS (USM)" title="RSS (USM)" hspace="3" src="/weblog/public/images/rss-icon-16.gif" />RSS (USM) 2.0 </a></div> <div><a href="http://virtuoso.openlinksw.com/blog/gems/index.rdf" class="rdf-link"><img border="0" alt="RDF" title="RDF" hspace="3" src="/weblog/public/images/rdf-icon-16.gif" />RDF RSS 1.0</a></div> <div><a href="http://virtuoso.openlinksw.com/blog/gems/index.ocs" class="ocs-link"><img border="0" alt="OCS" title="OCS" hspace="3" src="/weblog/public/images/blue-icon-16.gif" />OCS 0.5</a> </div> <div><a href="http://virtuoso.openlinksw.com/blog/gems/index.opml" class="opml-link"><img border="0" alt="OPML" title="OPML" hspace="3" src="/weblog/public/images/blue-icon-16.gif" />OPML 1.0</a> </div> <div><a href="http://virtuoso.openlinksw.com/blog/gems/xbel.xml?:from=2016-01-11&:to=2016-01-12" class="xbel-link"><img border="0" alt="XBEL" title="XBEL" hspace="3" src="/weblog/public/images/blue-icon-16.gif" />XBEL</a> </div> <div> <a href="http://virtuoso.openlinksw.com/blog/gems/podcasts.xml" class="podcast-link"><img border="0" alt="Podcasts" title="Podcasts" hspace="3" src="/weblog/public/images/rss-icon-16.gif" />Multimedia </a> </div> <div> <a href="http://virtuoso.openlinksw.com/blog/gems/podcasts.xml?:media=video" class="podcast-link"><img border="0" alt="Podcasts" title="Podcasts" hspace="3" src="/weblog/public/images/rss-icon-16.gif" />Videos </a> </div> <div> <a href="http://virtuoso.openlinksw.com/blog/gems/podcasts.xml?:media=audio" class="podcast-link"><img border="0" alt="Podcasts" title="Podcasts" hspace="3" src="/weblog/public/images/rss-icon-16.gif" />Audio </a> </div> <div> <a href="http://virtuoso.openlinksw.com/blog/gems/podcasts.xml?:media=image" class="podcast-link"><img border="0" alt="Podcasts" title="Podcasts" hspace="3" src="/weblog/public/images/rss-icon-16.gif" />Images </a> </div> <div><a href="pcast://virtuoso.openlinksw.com/blog/gems/podcasts.xml" class="itunes-link"><img border="0" alt="iTunes" title="iTunes" hspace="3" src="/weblog/public/images/rss-icon-16.gif" />iTunes Subscription</a> </div> <div><a href="http://virtuoso.openlinksw.com/blog/gems/mrss.xml" class="mrss-link"><img border="0" alt="Podcasts" title="Podcasts" hspace="3" src="/weblog/public/images/rss-icon-16.gif" />Media RSS (Yahoo!)</a></div> <div><a href="http://virtuoso.openlinksw.com/GData/136" class="gdata-link"><img border="0" alt="GData" title="GData" hspace="3" src="/weblog/public/images/blue-icon-16.gif" />GData</a></div> <div><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/sioc.rdf" class="sioc-link"><img border="0" alt="SIOC" title="SIOC" hspace="3" src="/weblog/public/images/rdf-icon-16.gif" />SIOC (RDF/XML)</a></div> <div><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/sioc.ttl" class="sioc-link"><img border="0" alt="SIOC" title="SIOC" hspace="3" src="/weblog/public/images/rdf-icon-16.gif" />SIOC (N3/Turtle)</a></div> </div> <div class="box"> <h2>Tag Cloud </h2> <div id="tags_cloud"><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/.net"><span style="font-size: 14px;">.net</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/Clustering"><span style="font-size: 12px;">Clustering</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/Database"><span style="font-size: 12px;">Database</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/Databases"><span style="font-size: 12px;">Databases</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/OpenLink"><span style="font-size: 12px;">OpenLink</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/Programming"><span style="font-size: 12px;">Programming</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/RDBMS"><span style="font-size: 12px;">RDBMS</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/RDF"><span style="font-size: 12px;">RDF</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/SPARQL"><span style="font-size: 12px;">SPARQL</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/SQL"><span style="font-size: 12px;">SQL</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/Scalability"><span style="font-size: 12px;">Scalability</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/Semantic Web"><span style="font-size: 12px;">Semantic Web</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/Virtuoso"><span style="font-size: 12px;">Virtuoso</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/ajax"><span style="font-size: 12px;">ajax</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/architecture"><span style="font-size: 12px;">architecture</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/atom"><span style="font-size: 12px;">atom</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/benchmarking"><span style="font-size: 19px; color: #CC3333;">benchmarking</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/cluster"><span style="font-size: 19px; color: #CC3333;">cluster</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/database"><span style="font-size: 27px; font-weight: bold; color: #9900CC;">database</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/databases"><span style="font-size: 27px; font-weight: bold; color: #9900CC;">databases</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/dataspace"><span style="font-size: 16px; color: #66CC99;">dataspace</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/db2"><span style="font-size: 12px;">db2</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/dynamic_languages"><span style="font-size: 12px;">dynamic_languages</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/entity_sql"><span style="font-size: 12px;">entity_sql</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/foaf"><span style="font-size: 13px;">foaf</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/history"><span style="font-size: 13px;">history</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/howto"><span style="font-size: 14px;">howto</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/hpc"><span style="font-size: 12px;">hpc</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/identity_20"><span style="font-size: 12px;">identity_20</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/infomania"><span style="font-size: 12px;">infomania</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/informix"><span style="font-size: 12px;">informix</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/ingres"><span style="font-size: 12px;">ingres</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/javascript"><span style="font-size: 12px;">javascript</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/jdbc"><span style="font-size: 14px;">jdbc</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/linq"><span style="font-size: 12px;">linq</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/linqtordf linq semantic web .net"><span style="font-size: 12px;">linqtordf linq semantic web .net</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/linux"><span style="font-size: 13px;">linux</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/lubm"><span style="font-size: 14px;">lubm</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/macosx"><span style="font-size: 12px;">macosx</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/mysql"><span style="font-size: 12px;">mysql</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/oat"><span style="font-size: 12px;">oat</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/odbc"><span style="font-size: 13px;">odbc</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/ods"><span style="font-size: 13px;">ods</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/oledb"><span style="font-size: 12px;">oledb</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/open-source"><span style="font-size: 12px;">open-source</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/openid"><span style="font-size: 12px;">openid</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/openlink"><span style="font-size: 15px;">openlink</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/oracle"><span style="font-size: 14px;">oracle</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/postgres"><span style="font-size: 12px;">postgres</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/rdf"><span style="font-size: 26px; font-weight: bold; color: #9900CC;">rdf</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/ruby"><span style="font-size: 12px;">ruby</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/scalability"><span style="font-size: 17px; color: #66CC99;">scalability</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/sem web"><span style="font-size: 12px;">sem web</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/semantic web"><span style="font-size: 12px;">semantic web</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/semanticweb"><span style="font-size: 30px; font-weight: bold; color: #9900CC;">semanticweb</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/sioc"><span style="font-size: 13px;">sioc</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/skos"><span style="font-size: 12px;">skos</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/socialnetworking"><span style="font-size: 13px;">socialnetworking</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/sparql"><span style="font-size: 23px; font-weight: bold; color: #339933;">sparql</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/sql"><span style="font-size: 14px;">sql</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/sql_server"><span style="font-size: 12px;">sql_server</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/unified_storage"><span style="font-size: 12px;">unified_storage</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/unix"><span style="font-size: 12px;">unix</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/venture_capital"><span style="font-size: 12px;">venture_capital</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/virtual_database"><span style="font-size: 12px;">virtual_database</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/virtuoso"><span style="font-size: 26px; font-weight: bold; color: #9900CC;">virtuoso</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/visionary"><span style="font-size: 12px;">visionary</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/web2.0"><span style="font-size: 13px;">web2.0</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/web20"><span style="font-size: 13px;">web20</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/web30"><span style="font-size: 14px;">web30</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/webservices"><span style="font-size: 14px;">webservices</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/windows"><span style="font-size: 12px;">windows</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/xml"><span style="font-size: 13px;">xml</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/xpath"><span style="font-size: 12px;">xpath</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/xquery"><span style="font-size: 12px;">xquery</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/xslt"><span style="font-size: 12px;">xslt</span></a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/tag/zigzag"><span style="font-size: 12px;">zigzag</span></a> </div> </div> <div class="roll"> </div> <h2>Post Categories</h2> <div class="roll"> <div class="roll"> <div><a href="index.vspx?page="> ALL </a> </div> <div> <a href="gems/rss_cat.xml?:cid=2&:bid=136" class="inlinelink"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a class="inlinelink" href="index.vspx?page=&cat=2">Enterprise Application Integration </a> </div> <div> <a href="gems/rss_cat.xml?:cid=1&:bid=136" class="inlinelink"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a class="inlinelink" href="index.vspx?page=&cat=1">Enterprise Information Integration </a> </div> <div> <a href="gems/rss_cat.xml?:cid=7&:bid=136" class="inlinelink"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a class="inlinelink" href="index.vspx?page=&cat=7">HTTP & WebDAV </a> </div> <div> <a href="gems/rss_cat.xml?:cid=8&:bid=136" class="inlinelink"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a class="inlinelink" href="index.vspx?page=&cat=8">SQL Database </a> </div> <div> <a href="gems/rss_cat.xml?:cid=9&:bid=136" class="inlinelink"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a class="inlinelink" href="index.vspx?page=&cat=9">SQL/XML (SQLX) </a> </div> <div> <a href="gems/rss_cat.xml?:cid=6&:bid=136" class="inlinelink"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a class="inlinelink" href="index.vspx?page=&cat=6">Universal Server </a> </div> <div> <a href="gems/rss_cat.xml?:cid=0&:bid=136" class="inlinelink"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a class="inlinelink" href="index.vspx?page=&cat=0">Virtual Database Technology </a> </div> <div> <a href="gems/rss_cat.xml?:cid=4&:bid=136" class="inlinelink"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a class="inlinelink" href="index.vspx?page=&cat=4">Web Services Platform </a> </div> <div> <a href="gems/rss_cat.xml?:cid=10&:bid=136" class="inlinelink"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a class="inlinelink" href="index.vspx?page=&cat=10">Weblog Technology </a> </div> <div> <a href="gems/rss_cat.xml?:cid=3&:bid=136" class="inlinelink"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a class="inlinelink" href="index.vspx?page=&cat=3">XML Database (XSL-T, XPath, XQuery, and XML Schema) </a> </div></div> </div> <h2>Recent Articles</h2> <div class="roll"> <ul class="last-messages"> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1867" rel="sioc:container_of">New Semantic Publishing Benchmark Record</a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1865" rel="sioc:container_of">DBpedia Usage Report, August 2015</a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1863" rel="sioc:container_of">Big Data, Part 2: Virtuoso Meets Impala</a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1861" rel="sioc:container_of">Vectored Execution in Column/Row Stores</a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1859" rel="sioc:container_of">Virtuoso at SIGMOD 2015</a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1857" rel="sioc:container_of">Big Data, Part 1: Virtuoso Meets Hive</a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1855" rel="sioc:container_of">Rethink Big and Europe?s Position in Big Data</a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1853" rel="sioc:container_of">Virtuoso updated to version 7.2.1 </a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1850" rel="sioc:container_of">Virtuoso Elastic Cluster Benchmarks AMI on Amazon EC2</a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1846" rel="sioc:container_of">In Hoc Signo Vinces (part 21 of n): Running TPC-H on Virtuoso Elastic Cluster on Amazon EC2</a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1844" rel="sioc:container_of">Introducing the OpenLink Virtuoso Benchmarks AMI on Amazon EC2</a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1842" rel="sioc:container_of">SNB Interactive, Part 3: Choke Points and Initial Run on Virtuoso</a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1839" rel="sioc:container_of">The Virtuoso Science Library</a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1837" rel="sioc:container_of">SNB Interactive, Part 2: Modeling Choices</a> </li> <li><a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1835" rel="sioc:container_of">SNB Interactive, Part 1: What is SNB Interactive Really About?</a> </li></ul> </div> <h2>Display Settings</h2> <div class="roll"> <div> <input type="text" name="v_n_blog_rows" value="10" size="6" class="textbox" /> articles per page. </div><div> <select name="v_sort_order" class="select"><option value="desc" selected="selected">descending</option><option value="asc" >ascending</option></select> order. </div> <div> <input type="submit" name="view_changes_post" value="Set" class="real_button" title="Set" alt="Set" /> <input type="submit" name="view_changes_reset" value="Reset" class="real_button" title="Reset" alt="Reset" /> </div> </div> </div> </td> <td id="texttd"> <div id="post-1867"> <div class="message" typeof="sioct:BlogPost" about="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1867"> <div class="post-title"> <span class="dc-title" property="dc:title">New Semantic Publishing Benchmark Record</span> </div> <div class="post-content" property="sioc:content"> <!-- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"><rdf:Description rdf:about="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2016-01-11#1867" dc:identifer="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2016-01-11#1867" dc:title="New Semantic Publishing Benchmark Record" trackback:ping="http://virtuoso.openlinksw.com/mt-tb/Http/trackback?id=1867"></rdf:Description></rdf:RDF> --> <p>There is a new <a href="http://ldbcouncil.org/developer/spb" id="link-id0x7fd87214c348">SPB (Semantic Publishing Benchmark)</a> 256 Mtriple record with <a href="http://dbpedia.org/resource/Virtuoso_Universal_Server" id="link-id0x7fd870d26a48">Virtuoso</a>.</p> <p>As before, the result has been measured with the <a href="https://github.com/v7fasttrack/virtuoso-opensource/tree/feature/analytics" id="link-id0x7fd870d26be8">feature/analytics</a> branch of the <a href="https://github.com/v7fasttrack/virtuoso-opensource/" id="link-id0x7fd870d26d58">v7fasttrack</a> open source distribution, and it will soon be available as a preconfigured Amazon EC2 image. The updated <a href="http://www.openlinksw.com/weblogs/oerling/?id=1849" id="link-id0x7fd87217aec8">benchmarks AMI</a> with this version of the software will be out there within the next week, to be announced on this blog.</p> <h2>On the Cost of RDF Query Optimization</h2> <p>RDF query optimization is harder than the relational equivalent; first, because there are more joins, hence an NP complete explosion of plan search space, and second, because cardinality estimation is harder and usually less reliable. The work on characteristic sets, pioneered by <a href="https://www.linkedin.com/pub/thomas-neumann/2/35/189" id="link-id0x7fd870c3e918">Thomas Neumann</a> in <a href="https://domino.mpi-inf.mpg.de/intranet/ag5/ag5publ.nsf/0/AD3DBAFA6FB90DD2C1257593002FF3DF/$file/rdf3x.pdf" id="link-id0x7fd870c3eae8">RDF3X</a>, uses regularities in structure for treating properties usually occurring in the same subject as columns of a table. The same idea is applied for tuning physical representation in <a href="http://homepages.cwi.nl/~duc/papers/emergentschema_www15.pdf" id="link-id0x7fd8721c1ea8">the joint Virtuoso / MonetDB work published at WWW 2015</a>.</p> <p>The Virtuoso results discussed here, however, are all based on a single RDF quad table with Virtuoso's default index configuration.</p> <p>Introducing query plan caching raises the Virtuoso score from 80 qps to 144 qps at the 256 Mtriple scale. The SPB queries are not extremely complex; lookups with many more triple patterns exist in actual workloads, e.g., <a href="http://dbpedia.org/resource/OpenPHACTS" id="link-id0x7fd8721313e8">Open PHACTS</a>. In such applications, query optimization indeed dominates execution times. In SPB, data volumes touched by queries grow near linearly with data scale. At the 256 Mtriple scale, nearly half of CPU cycles are spent deciding a query plan. Below are the CPU cycles for execution and compilation per query type, sorted by descending sum of the times, scaled to milliseconds per execution. These are taken from a one minute sample of running at full throughput.</p> <p>Test system is the same used before in the <a href="http://www.openlinksw.com/weblog/oerling/?id=1739" id="link-id0x7fd872131738">TPC-H series</a>: dual Xeon E5-2630 Sandy Bridge, 2 x 6 cores x 2 threads, 2.3GHz, 192 GB RAM.</p> <p>We measure the compile and execute times, with and without using hash join. When considering hash join, the throughput is 80 qps. When not considering hash join, the throughput is 110 qps. With query plan caching, the throughput is 145 qps whether or not hash join is considered. Using hash join is not significant for the workload but considering its use in query optimization leads to significant extra work.</p> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <td> <h3>With hash join</h3> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>Compile</th> <th>Execute</th> <th>Total</th> <th>Query</th> </tr> <tr> <td style="text-align:right;"> <code><i> 3156 ms </i></code> </td> <td style="text-align:right;"> <code><i> 1181 ms </i></code> </td> <td style="text-align:right;"> <code><i> 4337 ms </i></code> </td> <td style="text-align:right;"> <code><i> Total </i></code></td> </tr> <tr> <td style="text-align:right;"> <code> 1327 ms </code> </td> <td style="text-align:right;"> <code> 28 ms </code> </td> <td style="text-align:right;"> <code> 1355 ms </code> </td> <td style="text-align:right;"> <code> query 01 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 444 ms </code> </td> <td style="text-align:right;"> <code> 460 ms </code> </td> <td style="text-align:right;"> <code> 904 ms </code> </td> <td style="text-align:right;"> <code> query 08 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 466 ms </code> </td> <td style="text-align:right;"> <code> 54 ms </code> </td> <td style="text-align:right;"> <code> 520 ms </code> </td> <td style="text-align:right;"> <code> query 06 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 123 ms </code> </td> <td style="text-align:right;"> <code> 268 ms </code> </td> <td style="text-align:right;"> <code> 391 ms </code> </td> <td style="text-align:right;"> <code> query 05 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 257 ms </code> </td> <td style="text-align:right;"> <code> 5 ms </code> </td> <td style="text-align:right;"> <code> 262 ms </code> </td> <td style="text-align:right;"> <code> query 11 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 191 ms </code> </td> <td style="text-align:right;"> <code> 59 ms </code> </td> <td style="text-align:right;"> <code> 250 ms </code> </td> <td style="text-align:right;"> <code> query 10 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 9 ms </code> </td> <td style="text-align:right;"> <code> 179 ms </code> </td> <td style="text-align:right;"> <code> 188 ms </code> </td> <td style="text-align:right;"> <code> query 04 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 114 ms </code> </td> <td style="text-align:right;"> <code> 26 ms </code> </td> <td style="text-align:right;"> <code> 140 ms </code> </td> <td style="text-align:right;"> <code> query 07 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 46 ms </code> </td> <td style="text-align:right;"> <code> 62 ms </code> </td> <td style="text-align:right;"> <code> 108 ms </code> </td> <td style="text-align:right;"> <code> query 09 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 71 ms </code> </td> <td style="text-align:right;"> <code> 25 ms </code> </td> <td style="text-align:right;"> <code> 96 ms </code> </td> <td style="text-align:right;"> <code> query 12 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 61 ms </code> </td> <td style="text-align:right;"> <code> 13 ms </code> </td> <td style="text-align:right;"> <code> 74 ms </code> </td> <td style="text-align:right;"> <code> query 03 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 47 ms </code> </td> <td style="text-align:right;"> <code> 2 ms </code> </td> <td style="text-align:right;"> <code> 49 ms </code> </td> <td style="text-align:right;"> <code> query 02 </code></td> </tr> </table> </td> <td style="text-align:center;"> </td> <td style="text-align:center;"> <h3>Without hash join</h3> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>Compile</th> <th>Execute</th> <th>Total</th> <th>Query</th> </tr> <tr> <td style="text-align:right;"> <code><i> 1816 ms </i></code> </td> <td style="text-align:right;"> <code><i> 1019 ms </i></code> </td> <td style="text-align:right;"> <code><i> 2835 ms </i></code> </td> <td style="text-align:right;"> <code><i> Total </i></code></td> </tr> <tr> <td style="text-align:right;"> <code> 197 ms </code> </td> <td style="text-align:right;"> <code> 466 ms </code> </td> <td style="text-align:right;"> <code> 663 ms </code> </td> <td style="text-align:right;"> <code> query 08 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 609 ms </code> </td> <td style="text-align:right;"> <code> 32 ms </code> </td> <td style="text-align:right;"> <code> 641 ms </code> </td> <td style="text-align:right;"> <code> query 01 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 188 ms </code> </td> <td style="text-align:right;"> <code> 293 ms </code> </td> <td style="text-align:right;"> <code> 481 ms </code> </td> <td style="text-align:right;"> <code> query 05 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 275 ms </code> </td> <td style="text-align:right;"> <code> 61 ms </code> </td> <td style="text-align:right;"> <code> 336 ms </code> </td> <td style="text-align:right;"> <code> query 09 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 163 ms </code> </td> <td style="text-align:right;"> <code> 10 ms </code> </td> <td style="text-align:right;"> <code> 173 ms </code> </td> <td style="text-align:right;"> <code> query 03 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 128 ms </code> </td> <td style="text-align:right;"> <code> 38 ms </code> </td> <td style="text-align:right;"> <code> 166 ms </code> </td> <td style="text-align:right;"> <code> query 10 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 102 ms </code> </td> <td style="text-align:right;"> <code> 5 ms </code> </td> <td style="text-align:right;"> <code> 107 ms </code> </td> <td style="text-align:right;"> <code> query 11 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 63 ms </code> </td> <td style="text-align:right;"> <code> 27 ms </code> </td> <td style="text-align:right;"> <code> 90 ms </code> </td> <td style="text-align:right;"> <code> query 12 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 24 ms </code> </td> <td style="text-align:right;"> <code> 57 ms </code> </td> <td style="text-align:right;"> <code> 81 ms </code> </td> <td style="text-align:right;"> <code> query 06 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 47 ms </code> </td> <td style="text-align:right;"> <code> 1 ms </code> </td> <td style="text-align:right;"> <code> 48 ms </code> </td> <td style="text-align:right;"> <code> query 02 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 15 ms </code> </td> <td style="text-align:right;"> <code> 24 ms </code> </td> <td style="text-align:right;"> <code> 39 ms </code> </td> <td style="text-align:right;"> <code> query 07 </code></td> </tr> <tr> <td style="text-align:right;"> <code> 5 ms </code> </td> <td style="text-align:right;"> <code> 5 ms </code> </td> <td style="text-align:right;"> <code> 10 ms </code> </td> <td style="text-align:right;"> <code> query 04 </code></td> </tr> </table> </td> </tr> </table> <p>Considering hash join always slows down compilation, and sometimes improves and sometimes worsens execution. Some improvement in cost-model and plan-space traversal-order is possible, but altogether removing compilation via caching is better still. The results are as expected, since a lookup workload such as SPB has little use for hash join by nature.</p> <p>The rationale for considering hash join in the first place is that analytical workloads rely heavily on this. A good <a href="http://www.tpc.org/tpch/" id="link-id0x7fd8721ce268">TPC-H</a> score is simply unfeasible without this as <a href="http://www.openlinksw.com/weblog/oerling/?id=1739" id="link-id0x7fd8720f3f48">previously discussed on this blog</a>. If RDF is to be a serious contender beyond serving lookups, then hash join is indispensable. The decision for using this however depends on accurate cardinality estimates on either side of the join.</p> <p>Previous work (e.g., papers from <a href="http://www.forth.gr/" id="link-id0x7fd8720f8548">FORTH</a> around <a href="http://dbpedia.org/resource/MonetDB" id="link-id0x7fd8721bfe38">MonetDB</a>) advocates doing away with a cost model altogether, since one is hard and unreliable with RDF anyway. The idea is not without its attraction but will lead to missing out of analytics or to relying on query hints for hash join.</p> <p>The present Virtuoso thinking is that going to rule based optimization is not the preferred solution, but rather using characteristic sets for reducing triples into wider tables, which also cuts down on plan search space and increases reliability of cost estimation.</p> <p>When looking at execution alone, we see that actual database operations are low in the profile, with memory management taking the top 19%. This is due to <code>CONSTRUCT</code> queries allocating small blocks for returning graphs, which is entirely avoidable.</p> </div> <div class="spread_links"> <a href="http://technorati.com/cosmos/search.html?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1867" class="spread_link"><img src="/weblog/public/images/technorati.gif" alt="Find related stories via Technorati" title="Find related stories via Technorati" border="0" hspace="1" />related</a> <a href="http://del.icio.us/post?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1867&title=New Semantic Publishing Benchmark Record" class="spread_link"><img src="/weblog/public/images/delicious.gif" alt="Post to del.icio.us" title="Post to del.icio.us" border="0" hspace="1" />bookmark it!</a> <a href="http://www.digg.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1867&phase=2" class="spread_link"><img src="/weblog/public/images/digman.gif" alt="submit digg.com" title="submit digg.com" border="0" hspace="1" />digg it!</a> <a href="http://reddit.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1867&title=New Semantic Publishing Benchmark Record" class="spread_link"><img src="/weblog/public/images/reddithead.png" alt="post reddit" title="post reddit" border="0" hspace="1" />reddit!</a> <a href="http://twitter.com/share" class="twitter-share-button" data-count="horizontal" data-url="http://virtuoso.openlinksw.com/blog/?id=1867">Tweet</a> <script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script> </div> <div class="post-actions"> <a id="post_anchor1867" name="1867" class="noapp">#</a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1867">PermaLink</a> <a href="index.vspx?page=&id=1867&cmf=1#comments">Comments [0]</a> </div> <div class="pubdate"> <table cellpadding="0" cellspacing="0" width="100%"> <tr> <td> <span class="dc-date" property="dct:created">01/11/2016 15:22 GMT-0500</span> </td> <td align="right" colspan="3"> Modified: <span property="dct:modified"><span class="modified-date">01/11/2016 15:25 GMT-0500</span> </span> </td> </tr> </table> </div> </div> </div> <div id="post-1865"> <div class="message" typeof="sioct:BlogPost" about="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1865"> <div class="post-title"> <span class="dc-title" property="dc:title">DBpedia Usage Report, August 2015</span> </div> <div class="post-content" property="sioc:content"> <!-- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"><rdf:Description rdf:about="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-08-11#1865" dc:identifer="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-08-11#1865" dc:title="DBpedia Usage Report, August 2015" trackback:ping="http://virtuoso.openlinksw.com/mt-tb/Http/trackback?id=1865"></rdf:Description></rdf:RDF> --> <p>We recently published the latest <a href="http://bit.ly/1IL35Xu" id="link-id0x2aabbfb1e308">DBpedia Usage Report</a>, covering v3.3 (released July, 2009) to v3.10 (sometimes called "DBpedia 2014"; released September, 2014).</p> <p>The new report has usage data through July 31, 2015, and brought a few surprises to our eyes. What do you think?</p> </div> <div class="spread_links"> <a href="http://technorati.com/cosmos/search.html?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1865" class="spread_link"><img src="/weblog/public/images/technorati.gif" alt="Find related stories via Technorati" title="Find related stories via Technorati" border="0" hspace="1" />related</a> <a href="http://del.icio.us/post?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1865&title=DBpedia Usage Report, August 2015" class="spread_link"><img src="/weblog/public/images/delicious.gif" alt="Post to del.icio.us" title="Post to del.icio.us" border="0" hspace="1" />bookmark it!</a> <a href="http://www.digg.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1865&phase=2" class="spread_link"><img src="/weblog/public/images/digman.gif" alt="submit digg.com" title="submit digg.com" border="0" hspace="1" />digg it!</a> <a href="http://reddit.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1865&title=DBpedia Usage Report, August 2015" class="spread_link"><img src="/weblog/public/images/reddithead.png" alt="post reddit" title="post reddit" border="0" hspace="1" />reddit!</a> <a href="http://twitter.com/share" class="twitter-share-button" data-count="horizontal" data-url="http://virtuoso.openlinksw.com/blog/?id=1865">Tweet</a> <script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script> </div> <div class="post-actions"> <a id="post_anchor1865" name="1865" class="noapp">#</a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1865">PermaLink</a> <a href="index.vspx?page=&id=1865&cmf=1#comments">Comments [0]</a> </div> <div class="pubdate"> <table cellpadding="0" cellspacing="0" width="100%"> <tr> <td> <span class="dc-date" property="dct:created">08/11/2015 12:59 GMT-0500</span> </td> <td align="right" colspan="3"> </td> </tr> </table> </div> </div> </div> <div id="post-1863"> <div class="message" typeof="sioct:BlogPost" about="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1863"> <div class="post-title"> <span class="dc-title" property="dc:title">Big Data, Part 2: Virtuoso Meets Impala</span> </div> <div class="post-content" property="sioc:content"> <!-- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"><rdf:Description rdf:about="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-07-15#1863" dc:identifer="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-07-15#1863" dc:title="Big Data, Part 2: Virtuoso Meets Impala" trackback:ping="http://virtuoso.openlinksw.com/mt-tb/Http/trackback?id=1863"></rdf:Description></rdf:RDF> --> <p>In this article we will look at <a href="http://dbpedia.org/resource/Virtuoso_Universal_Server" id="link-id0x2aac0250bde8">Virtuoso</a> vs. <a href="https://dbpedia.org/resource/Cloudera_Impala" id="link-id0x2aac01e3c6d8">Impala</a> with 100G TPC-H on two R3.8 EC2 instances. We get a single user win for Virtuoso by a factor of 136, and a five user win by a factor of 55. The details and analysis follow.</p> <p>The load setup is the same as ever, with copying from CSV files attached as external tables into <a href="http://www.cloudera.com/content/cloudera/en/documentation/cloudera-impala/v2-0-x/topics/impala_parquet.html" id="link-id0x2aac014886d8">Parquet tables</a>. We get <code>lineitem</code> split over 88 Parquet files, which should provide enough parallelism for the platform. The Impala documentation states that there can be up to one thread per file, and here we wish to see maximum parallelism for a single query stream. We use the schema from the <a href="https://github.com/cloudera/impala" id="link-id0x2aac01cb0518">Impala github</a> checkout, with <code>string</code> for <code>string</code> and <code>date</code> columns, and <code>decimal</code> for <code>numbers</code>. We suppose the authors know what works best.</p> <p>The execution behavior is surprising. Sometimes we get full platform utilization, but quite often only 200% CPU per box. The query plan for Q1, for example, says 2 cores per box. This makes no sense, as the same plan fully well knows the table cardinality. The settings for scanner threads and cores to use (in <code>impala-shell</code>) can be changed, but the behavior does not seem to change.</p> <p>Following are the run times for one query stream.</p> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>Query</th> <th>Virtuoso</th> <th>Impala</th> <th>Notes</th> </tr> <tr> <th style="text-align:center;"> — </th> <td style="text-align:right;"> <code> 332 s </code> </td> <td style="text-align:right;"> <code> 841 s </code> </td> <td style="text-align:left;"> Data Load </td> </tr> <tr> <th style="text-align:center;"> Q1 </th> <td style="text-align:right;"> <code> 1.098 s </code> </td> <td style="text-align:right;"> <code> 164.61 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q2 </th> <td style="text-align:right;"> <code> 0.187 s </code> </td> <td style="text-align:right;"> <code> 24.19 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q3 </th> <td style="text-align:right;"> <code> 0.761 s </code> </td> <td style="text-align:right;"> <code> 105.70 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q4 </th> <td style="text-align:right;"> <code> 0.205 s </code> </td> <td style="text-align:right;"> <code> 179.67 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q5 </th> <td style="text-align:right;"> <code> 0.808 s </code> </td> <td style="text-align:right;"> <code> 84.51 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q6 </th> <td style="text-align:right;"> <code> 2.403 s </code> </td> <td style="text-align:right;"> <code> 4.43 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q7 </th> <td style="text-align:right;"> <code> 0.59 s </code> </td> <td style="text-align:right;"> <code> 270.88 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q8 </th> <td style="text-align:right;"> <code> 0.775 s </code> </td> <td style="text-align:right;"> <code> 51.89 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q9 </th> <td style="text-align:right;"> <code> 1.836 s </code> </td> <td style="text-align:right;"> <code> 177.72 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q10 </th> <td style="text-align:right;"> <code> 3.165 s </code> </td> <td style="text-align:right;"> <code> 39.85 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q11 </th> <td style="text-align:right;"> <code> 1.37 s </code> </td> <td style="text-align:right;"> <code> 22.56 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q12 </th> <td style="text-align:right;"> <code> 0.356 s </code> </td> <td style="text-align:right;"> <code> 17.03 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q13 </th> <td style="text-align:right;"> <code> 2.233 s </code> </td> <td style="text-align:right;"> <code> 103.67 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q14 </th> <td style="text-align:right;"> <code> 0.488 s </code> </td> <td style="text-align:right;"> <code> 10.86 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q15 </th> <td style="text-align:right;"> <code> 0.72 s </code> </td> <td style="text-align:right;"> <code> 11.49 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q16 </th> <td style="text-align:right;"> <code> 0.814 s </code> </td> <td style="text-align:right;"> <code> 23.93 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q17 </th> <td style="text-align:right;"> <code> 0.681 s </code> </td> <td style="text-align:right;"> <code> 276.06 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q18 </th> <td style="text-align:right;"> <code> 1.324 s </code> </td> <td style="text-align:right;"> <code> 267.13 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q19 </th> <td style="text-align:right;"> <code> 0.417 s </code> </td> <td style="text-align:right;"> <code> 368.80 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q20 </th> <td style="text-align:right;"> <code> 0.792 s </code> </td> <td style="text-align:right;"> <code> 60.45 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q21 </th> <td style="text-align:right;"> <code> 0.720 s </code> </td> <td style="text-align:right;"> <code> 418.09 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q22 </th> <td style="text-align:right;"> <code> 0.155 s </code> </td> <td style="text-align:right;"> <code> 40.59 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Total </th> <td style="text-align:right;"> <code> 20 s </code> </td> <td style="text-align:right;"> <code> 2724 s </code> </td> <td style="text-align:left;"> </td> </tr> </table> <p>Because the platform utilization was often low, we made a second experiment running the same queries in five parallel sessions. We show the average execution time for each query. We then compare this with the Virtuoso throughput run average times. We permute the single query stream used in the first tests in 5 different orders, as per the TPC-H spec. The results are not entirely comparable, because Virtuoso is doing the refreshes in parallel. According to Impala documentation, there is no random delete operation, so the refreshes cannot be implemented.</p> <p>Just to establish a baseline, we do <code>SELECT COUNT (*) FROM lineitem</code>. This takes 20s when run by itself. When run in five parallel sessions, the fastest terminates in 64s and the slowest in 69s. Looking at <code>top</code>, the platform utilization is indeed about 5x more in CPU%, but the concurrency does not add much to throughput. This is odd, considering that there is no synchronization requirement worth mentioning between the operations.</p> <p>Following are the average times for each query in the 5 stream experiment.</p> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>Query</th> <th>Virtuoso</th> <th>Impala</th> <th>Notes</th> </tr> <tr> <th style="text-align:center;"> Q1 </th> <td style="text-align:right;"> <code> 1.95 s </code> </td> <td style="text-align:right;"> <code> 191.81 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q2 </th> <td style="text-align:right;"> <code> 0.70 s </code> </td> <td style="text-align:right;"> <code> 40.40 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q3 </th> <td style="text-align:right;"> <code> 2.01 s </code> </td> <td style="text-align:right;"> <code> 95.67 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q4 </th> <td style="text-align:right;"> <code> 0.71 s </code> </td> <td style="text-align:right;"> <code> 345.11 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q5 </th> <td style="text-align:right;"> <code> 2.93 s </code> </td> <td style="text-align:right;"> <code> 112.29 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q6 </th> <td style="text-align:right;"> <code> 4.76 s </code> </td> <td style="text-align:right;"> <code> 14.41 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q7 </th> <td style="text-align:right;"> <code> 2.08 s </code> </td> <td style="text-align:right;"> <code> 329.25 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q8 </th> <td style="text-align:right;"> <code> 3.00 s </code> </td> <td style="text-align:right;"> <code> 98.91 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q9 </th> <td style="text-align:right;"> <code> 5.58 s </code> </td> <td style="text-align:right;"> <code> 250.88 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q10 </th> <td style="text-align:right;"> <code> 8.23 s </code> </td> <td style="text-align:right;"> <code> 55.23 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q11 </th> <td style="text-align:right;"> <code> 4.26 s </code> </td> <td style="text-align:right;"> <code> 27.84 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q12 </th> <td style="text-align:right;"> <code> 1.74 s </code> </td> <td style="text-align:right;"> <code> 37.66 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q13 </th> <td style="text-align:right;"> <code> 6.07 s </code> </td> <td style="text-align:right;"> <code> 147.69 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q14 </th> <td style="text-align:right;"> <code> 1.73 s </code> </td> <td style="text-align:right;"> <code> 23.91 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q15 </th> <td style="text-align:right;"> <code> 2.27 s </code> </td> <td style="text-align:right;"> <code> 23.79 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q16 </th> <td style="text-align:right;"> <code> 2.41 s </code> </td> <td style="text-align:right;"> <code> 34.76 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q17 </th> <td style="text-align:right;"> <code> 3.92 s </code> </td> <td style="text-align:right;"> <code> 362.43 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q18 </th> <td style="text-align:right;"> <code> 3.02 s </code> </td> <td style="text-align:right;"> <code> 348.08 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q19 </th> <td style="text-align:right;"> <code> 2.27 s </code> </td> <td style="text-align:right;"> <code> 443.94 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q20 </th> <td style="text-align:right;"> <code> 3.05 s </code> </td> <td style="text-align:right;"> <code> 92.50 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q21 </th> <td style="text-align:right;"> <code> 2.00 s </code> </td> <td style="text-align:right;"> <code> 623.69 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q22 </th> <td style="text-align:right;"> <code> 0.37 s </code> </td> <td style="text-align:right;"> <code> 61.36 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Total for <br /> Slowest Stream </th> <td style="text-align:right;"> <code> 67 s </code> </td> <td style="text-align:right;"> <code> 3740 s </code> </td> <td style="text-align:left;"> </td> </tr> </table> <p>There are 4 queries in Impala that terminated with an error (<code>memory limit exceeded</code>). These were two Q21s, one Q19, one Q4. One stream executed without errors, so this stream is reported as the slowest stream. Q21 will, in the absence of indexed access, do a hash build side of half of <code>lineitem</code>, which explains running out of memory. Virtuoso does Q21 mostly by index.</p> <p>Looking at the 5 streams, we see CPU between 1000% and 2000% on either box. This looks about 5x more than the 250% per box that we were seeing with, for instance, Q1. The process sizes for <code>impalad</code> are over 160G, certainly enough to have the working set in memory. <code>iostat</code> also does not show any <code>I</code>, so we seem to be running from memory, as intended.</p> <p>We observe that Impala does not store tables in any specific order. Therefore a merge join of <code>orders</code> and <code>lineitem</code> is not possible. Thus we always get a hash join with a potentially large build side, e.g., half of <code>orders</code> and half of <code>lineitem</code> in Q21, and all <code>orders</code> in Q9. This explains in part why these take so long. <a href="http://www.tpc.org/tpcds/" id="link-id0x2aac01486778">TPC-DS</a> does not pose this particular problem though, as there are no tables in the DS schema where the primary key of one would be the prefix of that of another.</p> <p>However, the <code>lineitem/orders</code> join does not explain the scores on Q1, Q20, or Q19. A simple hash join of <code>lineitem</code> and <code>part</code> was about 90s, with a replicated <code>part</code> hash table. In the profile, the hash probe was 74s, which seems excessive. One would have to single-step through the hash probe to find out what actually happens. Maybe there are prohibitive numbers of collisions, which would throw off the results across the board. We would have to ask the Impala community about this.</p> <p>Anyway, <i><b>Impala experts out there are invited to set the record straight.</b></i> We have attached the results and the output of the Impala <code>profile</code> statement for each query for the single stream run. <code><a href="http://www.openlinksw.com/weblog/oerling/media/impala_stream0.zip" title="impala_stream0.zip" alt="impala_stream0.zip" id="link-id0x2aac0170e038">impala_stream0.zip</a></code> contains the evidence for the single-stream run; <code><a href="http://www.openlinksw.com/weblog/oerling/media/impala-stream1-5.zip" title="impala-stream1-5.zip" alt="impala-stream1-5.zip" id="link-id0x2aac0170e258">impala-stream1-5.zip</a></code> holds the 5-stream run.</p> <p>To be more Big Data-like, we should probably run with significantly larger data than memory; for example, 3T in 0.5T RAM. At EC2, we could do this with 2 I3.8 instances (6.4T SSD each). With Virtuoso, we'd be done in 8 hours or so, counting 2x for the I/O and 30x for the greater scale (the 100G experiment goes in 8 minutes or so, all included). With Impala, we could be running for weeks, so at the very least we'd like to do this with an Impala expert, to make sure things are done right and will not have to be retried. Some of the hash joins would have to be done in multiple passes and with partitioning.</p> <p>In subsequent articles, we will look at other players in this space, and possibly some other benchmarks, like the TPC-DS subset that <a href="http://www.actian.com/" id="link-id0x2aac0045dc78">Actian</a> uses to beat Impala.</p> </div> <div class="spread_links"> <a href="http://technorati.com/cosmos/search.html?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1863" class="spread_link"><img src="/weblog/public/images/technorati.gif" alt="Find related stories via Technorati" title="Find related stories via Technorati" border="0" hspace="1" />related</a> <a href="http://del.icio.us/post?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1863&title=Big Data, Part 2: Virtuoso Meets Impala" class="spread_link"><img src="/weblog/public/images/delicious.gif" alt="Post to del.icio.us" title="Post to del.icio.us" border="0" hspace="1" />bookmark it!</a> <a href="http://www.digg.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1863&phase=2" class="spread_link"><img src="/weblog/public/images/digman.gif" alt="submit digg.com" title="submit digg.com" border="0" hspace="1" />digg it!</a> <a href="http://reddit.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1863&title=Big Data, Part 2: Virtuoso Meets Impala" class="spread_link"><img src="/weblog/public/images/reddithead.png" alt="post reddit" title="post reddit" border="0" hspace="1" />reddit!</a> <a href="http://twitter.com/share" class="twitter-share-button" data-count="horizontal" data-url="http://virtuoso.openlinksw.com/blog/?id=1863">Tweet</a> <script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script> </div> <div class="post-actions"> <a id="post_anchor1863" name="1863" class="noapp">#</a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1863">PermaLink</a> <a href="index.vspx?page=&id=1863&cmf=1#comments">Comments [0]</a> </div> <div class="pubdate"> <table cellpadding="0" cellspacing="0" width="100%"> <tr> <td> <span class="dc-date" property="dct:created">07/15/2015 16:17 GMT-0500</span> </td> <td align="right" colspan="3"> </td> </tr> </table> </div> </div> </div> <div id="post-1861"> <div class="message" typeof="sioct:BlogPost" about="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1861"> <div class="post-title"> <span class="dc-title" property="dc:title">Vectored Execution in Column/Row Stores</span> </div> <div class="post-content" property="sioc:content"> <!-- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"><rdf:Description rdf:about="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-07-13#1861" dc:identifer="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-07-13#1861" dc:title="Vectored Execution in Column/Row Stores" trackback:ping="http://virtuoso.openlinksw.com/mt-tb/Http/trackback?id=1861"></rdf:Description></rdf:RDF> --> <p>This article discusses the relationship between vectored execution and column- and row-wise data representations. <a href="http://dbpedia.org/resource/Column-oriented_DBMS" id="link-id0x2aab8a00d208">Column stores</a> are traditionally considered to be good for big scans but poor at indexed access. This is not necessarily so, though. We take <a href="http://www.openlinksw.com/weblog/oerling/?id=1789" id="link-id0x2aab8a00d398">TPC-H Q9</a> as a starting point, working with different row- and column-wise data representations and index choices. The goal of the article is to provide a primer on the performance implications of different physical designs.</p> <p>All the experiments are against the TPC-H 100G dataset hosted in Virtuoso on the test system used before in the <a href="http://www.openlinksw.com/weblog/oerling/?id=1739" id="link-id0x2aab8bcec9d8">TPC-H series</a>: dual Xeon E5-2630, 2x6 cores x 2 threads, 2.3GHz, 192 GB RAM. The Virtuoso version corresponds to the <a href="https://github.com/v7fasttrack/virtuoso-opensource/tree/feature/analytics" id="link-id0x2aab8bcecbd8">feature/analytics branch</a> in the <a href="https://github.com/v7fasttrack/virtuoso-opensource/" id="link-id0x2aab8bceccb8">v7fasttrack github project</a>. All run times are from memory, and queries generally run at full platform, 24 concurrent threads.</p> <p>We note that RDF stores and graph databases usually do not have secondary indices with multiple key parts. However, these do predominantly index-based access as opposed to big scans and hash joins. To explore the impact of this, we have decomposed the tables into projections with a single dependent column, which approximates a triple store or a vertically-decomposed graph database like <a href="http://dbpedia.org/resource/DEX_(Graph_database)" id="link-id0x2aab8bced038">Sparksee</a>.</p> <p>So, in these experiments, we store the relevant data four times over, as follows:</p> <ul> <li> <p>100G TPC-H dataset in the column-wise schema as discussed in the TPC-H series, now complemented with indices on <code>l_partkey</code> and on <code>l_partkey, l_suppkey</code> </p> </li> <li> <p>The same in row-wise data representation </p> </li> <li> <p>Column-wise tables with a single dependent column for <code>l_partkey, l_suppkey, l_extendedprice, l_quantity, l_discount, ps_supplycost, s_nationkey, p_name</code>. These all have the original tables primary key, e.g., <code>l_orderkey, l_linenumber</code> for the <code>l_ prefixed tables</code> </p> </li> <li> <p>The same with row-wise tables</p> </li> </ul> <p>The column-wise structures are in the <code>DB</code> qualifier, and the row-wise are in the <code>R</code> qualifier. There is a summary of space consumption at the end of the article. This is relevant for scalability, since even if row-wise structures can be faster for scattered random access, they will fit less data in RAM, typically 2 to 3x less. Thus, if "faster" rows cause the working set not to fit, "slower" columns will still win.</p> <p>As a starting point, we know that the best Q9 is the one in the Virtuoso TPC-H implementation which is described in <a href="http://www.openlinksw.com/weblog/oerling/?id=1789" id="link-id0x2aab8a78d208">Part 10 of the TPC-H blog series</a>. This is a scan of <code>lineitem</code> with a selective hash join followed ordered index access of <code>orders</code>, then hash joins against the smaller tables. There are special tricks to keep the hash tables small by propagating restrictions from the probe side to the build side.</p> <p>The query texts are <a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150713VectoredExecution/" id="link-id0x2aab8bc59538">available here</a>, along with the table declarations and scripts for populating the single-column projections. <code><a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150713VectoredExecution/rs.sql" id="link-id0x2aab8bc59778">rs.sql</a></code> makes the tables and indices, <code><a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150713VectoredExecution/rsload.sql" id="link-id0x2aab8bc59918">rsload.sql</a></code> copies the data from the TPC-H tables.</p> <p>The business question is to calculate the profit from sale of selected <code>parts</code> grouped by <code>year</code> and <code>country</code> of the <code>supplier</code>. This touches most of the tables, aggregates over 1/17 of all sales, and touches at least every page of the tables concerned, if not every row.</p> <blockquote> <code><pre>SELECT n_name AS nation, EXTRACT(year FROM o_orderdate) AS o_year, SUM (l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity) AS sum_profit FROM lineitem, part, partsupp, orders, supplier, nation WHERE s_suppkey = l_suppkey AND ps_suppkey = l_suppkey AND ps_partkey = l_partkey AND p_partkey = l_partkey AND o_orderkey = l_orderkey AND s_nationkey = n_nationkey AND p_name LIKE '%green%' GROUP BY nation, o_year ORDER BY nation, o_year DESC </pre> </code> </blockquote> <h2>Query Variants</h2> <p>The query variants discussed here are:</p> <ol> <li> <p>Hash based, the best plan -- <code><a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150713VectoredExecution/9h.sql" id="link-id0x2aab8bd04788">9h.sql</a></code> </p> </li> <li> <p>Index based with multicolumn rows, with <code>lineitem</code> index on <code>l_partkey</code> -- <code><a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150713VectoredExecution/9i.sql" id="link-id0x2aab8bd04bb8">9i.sql</a>, <a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150713VectoredExecution/9ir.sql" id="link-id0x2aab88894bf8">9ir.sql</a></code> </p> </li> <li> <p>Index based with multicolumn rows, <code>lineitem</code> index on <code>l_partkey, l_suppkey</code> -- <code><a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150713VectoredExecution/9ip.sql" id="link-id0x2aab88894fc8">9ip.sql</a>, <a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150713VectoredExecution/9ipr.sql" id="link-id0x2aab888950c8">9ipr.sql</a></code> </p> </li> <li> <p>Index based with one table per dependent column, index on <code>l_partkey</code> -- <code><a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150713VectoredExecution/9p.sql" id="link-id0x2aab8a4e42c8">9p.sql</a></code> </p> </li> <li> <p>index based with one table per dependent column, with materialized <code>l_partkey, l_suppkey</code> -> <code>l_orderkey, l_minenumber</code> -- <code><a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150713VectoredExecution/9pp.sql" id="link-id0x2aab8a4e4708">9pp.sql</a>, <a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150713VectoredExecution/9ppr.sql" id="link-id0x2aab8a4e4808">9ppr.sql</a></code> </p> </li> </ol> <p>These are done against row- and column-wise data representations with 3 different vectorization settings. The dynamic vector size starts at 10,000 values in a vector, and adaptively upgrades this to 1,000,000 if it finds that index access is too sparse. Accessing rows close to each other is more efficient than widely scattered rows in vectored index access, so using a larger vector will likely cause a denser, hence more efficient, access pattern.</p> <p>The 10K vector size corresponds to running with a fixed vector size. The Vector 1 sets vector size to 1, effectively running a tuple at a time, which corresponds to a non-vectorized engine.</p> <p>We note that <code>lineitem</code> and its single column projections contain 600M rows. So, a vector of 10K values will hit, on the average, every 60,000th row. A vector of 1,000,000 will thus hit every 600th. This is when doing random lookups that are in no specific order, e.g., getting <code>lineitems</code> by a secondary index on <code>l_partkey</code>.</p> <h3>1 — Hash-based plan</h3> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>Vector</th> <th>Dynamic</th> <th>10k</th> <th>1</th> </tr> <tr> <th style="text-align:left;">Column-wise </th> <td style="text-align:right;"><code>4.1 s</code> </td> <td style="text-align:right;"><code>4.1 s</code> </td> <td style="text-align:right;"><code>145 s</code> </td> </tr> <tr> <th style="text-align:left;">Row-wise </th> <td style="text-align:right;"><code>25.6 s</code> </td> <td style="text-align:right;"><code>25.9 s</code> </td> <td style="text-align:right;"><code>45.4 s</code> </td> </tr> </table> <p>Dynamic vector size has no effect here, as there is no indexed access that would gain from more locality. The column store is much faster because of less memory access (just scan the <code>l_partkey</code> column, and filter this with a Bloom filter; and then hash table lookup to pick only items with the desired <code>part</code>). The other columns are accessed only for the matching rows. The hash lookup is vectored since there are hundreds of compressed <code>l_partkey</code> values available at each time. The row store does the hash lookup row by row, hence losing cache locality and instruction-level parallelism.</p> <p>Without vectorization, we have a situation where the <code>lineitem</code> scan emits one row at a time. Restarting the scan with the column store takes much longer, since 5 buffers have to be located and pinned instead of one for the row store. The row store is thus slowed down less, but it too suffers almost a factor of 2 from interpretation overhead.</p> <h3>2 — Index-based, <code>lineitem</code> indexed on <code>l_partkey</code> </h3> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>Vector</th> <th>Dynamic</th> <th>10k</th> <th>1</th> </tr> <tr> <th style="text-align:left;">Column-wise </th> <td style="text-align:right;"><code> 30.4 s </code> </td> <td style="text-align:right;"><code> 62.3 s </code> </td> <td style="text-align:right;"><code> 321 s</code> </td> </tr> <tr> <th style="text-align:left;">Row-wise </th> <td style="text-align:right;"><code> 31.8 s</code> </td> <td style="text-align:right;"><code> 27.7 s</code> </td> <td style="text-align:right;"><code> 122 s </code> </td> </tr> </table> <p>Here the plan scans <code>part</code>, then <code>partsupp</code>, which shares ordering with <code>part</code>; both are ordered on <code>partkey</code>. Then <code>lineitem</code> is fetched by a secondary index on <code>l_partkey</code>. This produces <code>l_orderkey, l_lineitem</code>, which are used to get the <code>l_suppkey</code>. We then check if the <code>l_suppkey</code> matches the <code>ps_suppkey</code> from <code>partsupp</code>, which drops 3/4 of the rows. The next join is on <code>orders</code>, which shares ordering with <code>lineitem</code>; both are ordered on <code>orderkey</code>.</p> <p>There is a narrow win for columns with dynamic vector size. When access becomes scattered, rows win by 2.5x, because there is only one page to access instead of 1 + 3 for columns. This is compensated for if the next item is found on the same page, which happens if the access pattern is denser. </p> <h3>3 — Index-based, <code>lineitem</code> indexed on <code>L_partkey, l_suppkey</code> </h3> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>Vector</th> <th>Dynamic</th> <th>10k</th> <th>1</th> </tr> <tr> <th style="text-align:left;">Column-wise </th> <td style="text-align:right;"><code> 16.9 s</code> </td> <td style="text-align:right;"><code> 47.2 s </code> </td> <td style="text-align:right;"><code> 151 s </code> </td> </tr> <tr> <th style="text-align:left;">Row-wise </th> <td style="text-align:right;"><code> 22.4 s</code> </td> <td style="text-align:right;"><code> 20.7 s</code> </td> <td style="text-align:right;"><code> 89 s </code> </td> </tr> </table> <p>This is similar to the previous, except that now only <code>lineitems</code> that match <code>ps_partkey, ps_suppkey</code> are accessed, as the secondary index has two columns. Access is more local. Columns thus win more with dynamic vector size.</p> <h3>4 — Decomposed, index on <code>l_partkey</code> </h3> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>Vector</th> <th>Dynamic</th> <th>10k</th> <th>1</th> </tr> <tr> <th style="text-align:left;">Column-wise </th> <td style="text-align:right;"><code> 35.7 s </code> </td> <td style="text-align:right;"><code> 170 s </code> </td> <td style="text-align:right;"><code> 601 s </code> </td> </tr> <tr> <th style="text-align:left;">Row-wise </th> <td style="text-align:right;"><code> 44.5 s </code> </td> <td style="text-align:right;"><code> 56.2 s </code> </td> <td style="text-align:right;"><code> 130 s </code> </td> </tr> </table> <p>Now, each of the <code>l_extendedprice, l_discount, l_quantity</code> and <code>l_suppkey</code> is a separate index lookup. The times are slightly higher but the dynamic is the same.</p> <p>The non-vectored columns case is hit the hardest.</p> <h3>5 — Decomposed, index on <code>l_partkey, l_suppkey</code> </h3> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>Vector</th> <th>Dynamic</th> <th>10k</th> <th>1</th> </tr> <tr> <th style="text-align:left;">Column-wise </th> <td style="text-align:right;"><code> 19.6 s </code> </td> <td style="text-align:right;"><code> 111 s </code> </td> <td style="text-align:right;"><code> 257 s </code> </td> </tr> <tr> <th style="text-align:left;">Row-wise </th> <td style="text-align:right;"><code> 32.0 s </code> </td> <td style="text-align:right;"><code> 37 s </code> </td> <td style="text-align:right;"><code> 74.9 s </code> </td> </tr> </table> <p>Again, we see the same dynamic as with a multicolumn table. Columns win slightly more at long vector sizes because of overall better index performance in the presence of locality.</p> <h2>Space Utilization </h2> <p>The following tables list the space consumption in megabytes of allocated pages. Unallocated space in database files is not counted.</p> <p>The row-wise table also contains entries for column-wise structures (<code>DB.*</code>) since these have a row-wise sparse index. The size of this is however negligible, under 1% of the column-wise structures.</p> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr> <th style="text-align: center;">Row-Wise</th> <th> </th> <th style="text-align: center;"> Column-Wise</th> </tr> <tr> <td style="vertical-align:top;"> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>MB</th> <th>structure</th> </tr> <tr> <td style="text-align:right;"><code>73515</code> </td> <td style="text-align:left;"><code>R.DBA.LINEITEM</code> </td> </tr> <tr> <td style="text-align:right;"><code>14768</code> </td> <td style="text-align:left;"><code>R.DBA.ORDERS</code> </td> </tr> <tr> <td style="text-align:right;"><code>11728</code> </td> <td style="text-align:left;"><code>R.DBA.PARTSUPP</code> </td> </tr> <tr> <td style="text-align:right;"><code>10161</code></td> <td style="text-align:left;"><code>r_lpk_pk</code> </td> </tr> <tr> <td style="text-align:right;"><code>10003</code></td> <td style="text-align:left;"><code>r_l_pksk</code> </td> </tr> <tr> <td style="text-align:right;"><code>9908</code></td> <td style="text-align:left;"><code>R.DBA.l_partkey</code> </td> </tr> <tr> <td style="text-align:right;"><code>8761</code></td> <td style="text-align:left;"><code>R.DBA.l_extendedprice</code> </td> </tr> <tr> <td style="text-align:right;"><code>8745</code></td> <td style="text-align:left;"><code>R.DBA.l_discount</code> </td> </tr> <tr> <td style="text-align:right;"><code>8738</code></td> <td style="text-align:left;"><code>r_l_pk</code> </td> </tr> <tr> <td style="text-align:right;"><code>8713</code></td> <td style="text-align:left;"><code>R.DBA.l_suppkey</code> </td> </tr> <tr> <td style="text-align:right;"><code>6267</code></td> <td style="text-align:left;"><code>R.DBA.l_quantity</code> </td> </tr> <tr> <td style="text-align:right;"><code>2223</code></td> <td style="text-align:left;"><code>R.DBA.CUSTOMER</code> </td> </tr> <tr> <td style="text-align:right;"><code>2180</code></td> <td style="text-align:left;"><code>R.DBA.o_orderdate</code> </td> </tr> <tr> <td style="text-align:right;"><code>2041</code></td> <td style="text-align:left;"><code>r_O_CK</code> </td> </tr> <tr> <td style="text-align:right;"><code>1911</code></td> <td style="text-align:left;"><code>R.DBA.PART</code> </td> </tr> <tr> <td style="text-align:right;"><code>1281</code></td> <td style="text-align:left;"><code>R.DBA.ps_supplycost</code> </td> </tr> <tr> <td style="text-align:right;"><code>811</code></td> <td style="text-align:left;"><code>R.DBA.p_name</code> </td> </tr> <tr> <td style="text-align:right;"><code>127</code></td> <td style="text-align:left;"><code>R.DBA.SUPPLIER</code> </td> </tr> <tr> <td style="text-align:right;"><code>88</code></td> <td style="text-align:left;"><code>DB.DBA.LINEITEM</code> </td> </tr> <tr> <td style="text-align:right;"><code>24</code></td> <td style="text-align:left;"><code>DB.DBA.ORDERS</code> </td> </tr> <tr> <td style="text-align:right;"><code>11</code></td> <td style="text-align:left;"><code>DB.DBA.PARTSUPP</code> </td> </tr> <tr> <td style="text-align:right;"><code>9</code></td> <td style="text-align:left;"><code>R.DBA.s_nationkey</code> </td> </tr> <tr> <td style="text-align:right;"><code>5</code></td> <td style="text-align:left;"><code>l_pksk</code> </td> </tr> <tr> <td style="text-align:right;"><code>4</code></td> <td style="text-align:left;"><code>DB.DBA.l_partkey</code> </td> </tr> <tr> <td style="text-align:right;"><code>4</code></td> <td style="text-align:left;"><code>lpk_pk</code> </td> </tr> <tr> <td style="text-align:right;"><code>4</code></td> <td style="text-align:left;"><code>DB.DBA.l_extendedprice</code> </td> </tr> <tr> <td style="text-align:right;"><code>3</code></td> <td style="text-align:left;"><code>l_pk</code> </td> </tr> <tr> <td style="text-align:right;"><code>3</code></td> <td style="text-align:left;"><code>DB.DBA.l_suppkey</code> </td> </tr> <tr> <td style="text-align:right;"><code>2</code></td> <td style="text-align:left;"><code>DB.DBA.CUSTOMER</code> </td> </tr> <tr> <td style="text-align:right;"><code>2</code></td> <td style="text-align:left;"><code>DB.DBA.l_quantity</code> </td> </tr> <tr> <td style="text-align:right;"><code>1</code></td> <td style="text-align:left;"><code>DB.DBA.PART</code> </td> </tr> <tr> <td style="text-align:right;"><code>1</code></td> <td style="text-align:left;"><code>O_CK</code> </td> </tr> <tr> <td style="text-align:right;"><code>1</code></td> <td style="text-align:left;"><code>DB.DBA.l_discount</code> </td> </tr> </table> </td> <td> </td> <td style="vertical-align:top;"> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>MB</th> <th>structure</th> </tr> <tr> <td style="text-align:right;"><code>36482</code> </td> <td style="text-align:left;"><code>DB.DBA.LINEITEM</code> </td> </tr> <tr> <td style="text-align:right;"><code>13087</code></td> <td style="text-align:left;"><code>DB.DBA.ORDERS</code> </td> </tr> <tr> <td style="text-align:right;"><code>11587</code></td> <td style="text-align:left;"><code>DB.DBA.PARTSUPP</code> </td> </tr> <tr> <td style="text-align:right;"><code>5181</code></td> <td style="text-align:left;"><code>DB.DBA.l_extendedprice</code> </td> </tr> <tr> <td style="text-align:right;"><code>4431</code></td> <td style="text-align:left;"><code>l_pksk</code> </td> </tr> <tr> <td style="text-align:right;"><code>3072</code></td> <td style="text-align:left;"><code>DB.DBA.l_partkey</code> </td> </tr> <tr> <td style="text-align:right;"><code>2958</code></td> <td style="text-align:left;"><code>lpk_pk</code> </td> </tr> <tr> <td style="text-align:right;"><code>2918</code></td> <td style="text-align:left;"><code>l_pk</code> </td> </tr> <tr> <td style="text-align:right;"><code>2835</code></td> <td style="text-align:left;"><code>DB.DBA.l_suppkey</code> </td> </tr> <tr> <td style="text-align:right;"><code>2067</code></td> <td style="text-align:left;"><code>DB.DBA.CUSTOMER</code> </td> </tr> <tr> <td style="text-align:right;"><code>1618</code></td> <td style="text-align:left;"><code>DB.DBA.PART</code> </td> </tr> <tr> <td style="text-align:right;"><code>1156</code></td> <td style="text-align:left;"><code>DB.DBA.l_quantity</code> </td> </tr> <tr> <td style="text-align:right;"><code>961</code></td> <td style="text-align:left;"><code>DB.DBA.ps_supplycost</code> </td> </tr> <tr> <td style="text-align:right;"><code>814</code></td> <td style="text-align:left;"><code>O_CK</code> </td> </tr> <tr> <td style="text-align:right;"><code>798</code></td> <td style="text-align:left;"><code>DB.DBA.l_discount</code> </td> </tr> <tr> <td style="text-align:right;"><code>724</code></td> <td style="text-align:left;"><code>DB.DBA.p_name</code> </td> </tr> <tr> <td style="text-align:right;"><code>436</code></td> <td style="text-align:left;"><code>DB.DBA.o_orderdate</code> </td> </tr> <tr> <td style="text-align:right;"><code>126</code></td> <td style="text-align:left;"><code>DB.DBA.SUPPLIER</code> </td> </tr> <tr> <td style="text-align:right;"><code>1</code></td> <td style="text-align:left;"><code>DB.DBA.s_nationkey</code> </td> </tr> </table> </td> </tr> </table> <p>In both cases, the large tables are on top, but the column-wise case takes only half the space due to compression. </p> <p>We note that the single column projections are smaller column-wise. The <code>l_extendedprice</code> is not very compressible hence column-wise takes much more space than <code>l_quantity</code>; the row-wise difference is less. Since the leading key parts <code>l_orderkey, l_linenumber</code> are ordered and very compressible, the column-wise structures are in all cases noticeably more compact.</p> <p>The same applies to the multipart index <code>l_pksk</code> and <code>r_l_pksk</code> (<code>l_partkey, l_suppkey, l_orderkey, l_linenumber</code>) in column- and row-wise representations.</p> <p>Note that <code>STRING</code> columns (e.g., <code>l_comment</code>) are not compressed. If they were, the overall space ratio would be even more to the advantage of the column store.</p> <h2>Conclusions </h2> <p>Column stores and vectorization inextricably belong together. Column-wise compression yields great gains also for indices, since sorted data is easy to compress. Also for non-sorted data, adaptive use of dictionaries, run lengths, etc., produce great space savings. Columns also win with indexed access if there is locality.</p> <p>Row stores have less dependence on locality, but they also will win by a factor of 3 from dropping interpretation overhead and exploiting join locality.</p> <p>For point lookups, columns lose by 2+x but considering their better space efficiency, they will still win if space savings prevent going to secondary storage. For bulk random access, like in graph analytics, columns will win because of being able to operate on a large vector of keys to fetch.</p> <p>For many workloads, from TPC-H to LDBC social network, multi-part keys are a necessary component of physical design for performance if indexed access predominates. Triple stores and most graph databases do not have such and are therefore at a disadvantage. Self-joining, like in RDF or other vertically decomposed structures, can cost up to a factor of 10-20 over a column-wise multicolumn table. This depends however on the density of access.</p> <p>For analytical workloads, where the dominant join pattern is the scan with selective hash join, column stores are unbeatable, as per common wisdom. There are good physical reasons for this and the row store even with well implemented vectorization loses by a factor of 5.</p> <p>For decomposed structures, like RDF quads or single column projections of tables, column stores are relatively more advantageous because the key columns are extensively repeated, and these compress better with columns than with rows. In all the RDF workloads we have tried, columns never lose, but there is often a draw between rows and columns for lookup workloads. The longer the query, the more columns win.</p> </div> <div class="spread_links"> <a href="http://technorati.com/cosmos/search.html?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1861" class="spread_link"><img src="/weblog/public/images/technorati.gif" alt="Find related stories via Technorati" title="Find related stories via Technorati" border="0" hspace="1" />related</a> <a href="http://del.icio.us/post?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1861&title=Vectored Execution in Column/Row Stores" class="spread_link"><img src="/weblog/public/images/delicious.gif" alt="Post to del.icio.us" title="Post to del.icio.us" border="0" hspace="1" />bookmark it!</a> <a href="http://www.digg.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1861&phase=2" class="spread_link"><img src="/weblog/public/images/digman.gif" alt="submit digg.com" title="submit digg.com" border="0" hspace="1" />digg it!</a> <a href="http://reddit.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1861&title=Vectored Execution in Column/Row Stores" class="spread_link"><img src="/weblog/public/images/reddithead.png" alt="post reddit" title="post reddit" border="0" hspace="1" />reddit!</a> <a href="http://twitter.com/share" class="twitter-share-button" data-count="horizontal" data-url="http://virtuoso.openlinksw.com/blog/?id=1861">Tweet</a> <script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script> </div> <div class="post-actions"> <a id="post_anchor1861" name="1861" class="noapp">#</a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1861">PermaLink</a> <a href="index.vspx?page=&id=1861&cmf=1#comments">Comments [0]</a> </div> <div class="pubdate"> <table cellpadding="0" cellspacing="0" width="100%"> <tr> <td> <span class="dc-date" property="dct:created">07/13/2015 13:49 GMT-0500</span> </td> <td align="right" colspan="3"> Modified: <span property="dct:modified"><span class="modified-date">07/13/2015 13:56 GMT-0500</span> </span> </td> </tr> </table> </div> </div> </div> <div id="post-1859"> <div class="message" typeof="sioct:BlogPost" about="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1859"> <div class="post-title"> <span class="dc-title" property="dc:title">Virtuoso at SIGMOD 2015</span> </div> <div class="post-content" property="sioc:content"> <!-- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"><rdf:Description rdf:about="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-07-13#1859" dc:identifer="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-07-13#1859" dc:title="Virtuoso at SIGMOD 2015" trackback:ping="http://virtuoso.openlinksw.com/mt-tb/Http/trackback?id=1859"></rdf:Description></rdf:RDF> --> <p>Two papers presented at <a href="http://dbpedia.org/resource/SIGMOD" id="link-id0x10f5b4448">SIGMOD</a> <a href="http://www.sigmod2015.org/" id="link-id0x10f84b6e8">2015</a> have been added to the <a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/VirtuosoScienceLibrary" id="link-id0x10f84b8c8">Virtuoso Science Library</a>.</p> <ul> <li> <p> <b>Orri Erling (OpenLink Software); Alex Averbuch (Neo Technology); Josep Larriba-Pey (Sparsity Technologies); Hassan Chafi (Oracle Labs); Andrey Gubichev (TU Munich); Arnau Prat-Pérez (Universitat Politècnica de Catalunya); Minh-Duc Pham (VU University Amsterdam); Peter Boncz (CWI): <a href="http://dl.acm.org/authorize.cfm?key=N97179" id="link-id0x10f7147b8">The LDBC Social Network Benchmark: Interactive Workload</a>. <a href="http://www.sigmod2015.org/toc_sigmod.shtml" id="link-id0x10f7148d8">Proceedings of SIGMOD 2015, Melbourne</a>.</b> </p> <p>This paper is an overview of the challenges posed in the LDBC social network benchmark, from data generation to the interactive workload.</p> </li> <li> <p> <b>Mihai Capotă (Delft University of Technology), Tim Hegeman (Delft University of Technology), Alexandru Iosup (Delft University of Technology), Arnau Prat-Pérez (Universitat Politècnica de Catalunya), Orri Erling (OpenLink Software), Peter Boncz (CWI): <a href="http://dl.acm.org/authorize.cfm?key=N97204" id="link-id0x110864288">Graphalytics: A Big Data Benchmark for Graph-Processing Platforms</a>. <a href="http://www.sigmod2015.org/toc_grades.html" id="link-id0x1108643a8">Sigmod GRADES 2015</a>.</b> </p> <p>This paper discusses the future evolution of the LDBC Social Network Benchmark and gives a preview of Virtuoso graph traversal performance.</p> </li> </ul> </div> <div class="spread_links"> <a href="http://technorati.com/cosmos/search.html?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1859" class="spread_link"><img src="/weblog/public/images/technorati.gif" alt="Find related stories via Technorati" title="Find related stories via Technorati" border="0" hspace="1" />related</a> <a href="http://del.icio.us/post?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1859&title=Virtuoso at SIGMOD 2015" class="spread_link"><img src="/weblog/public/images/delicious.gif" alt="Post to del.icio.us" title="Post to del.icio.us" border="0" hspace="1" />bookmark it!</a> <a href="http://www.digg.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1859&phase=2" class="spread_link"><img src="/weblog/public/images/digman.gif" alt="submit digg.com" title="submit digg.com" border="0" hspace="1" />digg it!</a> <a href="http://reddit.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1859&title=Virtuoso at SIGMOD 2015" class="spread_link"><img src="/weblog/public/images/reddithead.png" alt="post reddit" title="post reddit" border="0" hspace="1" />reddit!</a> <a href="http://twitter.com/share" class="twitter-share-button" data-count="horizontal" data-url="http://virtuoso.openlinksw.com/blog/?id=1859">Tweet</a> <script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script> </div> <div class="post-actions"> <a id="post_anchor1859" name="1859" class="noapp">#</a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1859">PermaLink</a> <a href="index.vspx?page=&id=1859&cmf=1#comments">Comments [0]</a> </div> <div class="pubdate"> <table cellpadding="0" cellspacing="0" width="100%"> <tr> <td> <span class="dc-date" property="dct:created">07/13/2015 12:52 GMT-0500</span> </td> <td align="right" colspan="3"> </td> </tr> </table> </div> </div> </div> <div id="post-1857"> <div class="message" typeof="sioct:BlogPost" about="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1857"> <div class="post-title"> <span class="dc-title" property="dc:title">Big Data, Part 1: Virtuoso Meets Hive</span> </div> <div class="post-content" property="sioc:content"> <!-- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"><rdf:Description rdf:about="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-07-13#1857" dc:identifer="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-07-13#1857" dc:title="Big Data, Part 1: Virtuoso Meets Hive" trackback:ping="http://virtuoso.openlinksw.com/mt-tb/Http/trackback?id=1857"></rdf:Description></rdf:RDF> --> <p>In this series, we will look at <a href="http://dbpedia.org/resource/Virtuoso_Universal_Server" id="link-id0x2aab3b3c1df8">Virtuoso</a> and some of the big data technologies out there. <a href="http://dbpedia.org/resource/SQL" id="link-id0x2aab3b3c1f08">SQL</a> on <a href="http://dbpedia.org/resource/Apache_Hadoop" id="link-id0x2aab3b3c2028">Hadoop</a> is of interest, as well as <a href="http://dbpedia.org/resource/NoSQL" id="link-id0x2aab3b3c2148">NoSQL</a> technologies.</p> <p>We begin at the beginning, with <a href="http://dbpedia.org/resource/Apache_Hive" id="link-id0x2aab3b3c2168">Hive</a>, the grand-daddy of SQL on Hadoop.</p> <p>The test platform is two Amazon R3.8 <a href="http://dbpedia.org/resource/Amazon_Machine_Image" id="link-id0x2aab62f44bf8">AMI</a> instances. We compared Hive with the Virtuoso 100G TPC-H experiment on the same platform, <a href="http://www.openlinksw.com/weblog/oerling/?id=1845" id="link-id0x2aab62f44d78">published earlier on this blog</a>. The runs follow a bulk load in both cases, with all data served from memory. The platform has 2x244GB RAM with only 40GB or so of working set.</p> <p>The Virtuoso version and settings are as in the <a href="http://www.openlinksw.com/weblogs/oerling/?id=1849" id="link-id0x2aab3be6df88">Virtuoso Cluster test AMI</a>.</p> <p>The Hive version is <a href="http://hortonworks.com/blog/announcing-apache-hive-0-14/" id="link-id0x2aab3be6e148">0.14</a> from the <a href="http://dbpedia.org/resource/Hortonworks" id="link-id0x2aab3be6e298">Hortonworks</a> <a href="http://hortonworks.com/hdp/downloads/" id="link-id0x2aab3b149c48">HDP 2.2 distribution>. The Hive schema and query formulations are the ones from <a href="https://github.com/hortonworks/hive-testbench" id="link-id0x2aab3b1498c8"><code>hive-testbench</code> on GitHub</a>. The Hive configuration parameters are as set by <a href="https://cwiki.apache.org/confluence/display/AMBARI/Installation+Guide+for+Ambari+2.0.1" id="link-id0x2aab3b149aa8">Ambari 2.0.1</a>. These are different from the ones in <code>hive-testbench</code>, but the Ambari choices offer higher performance on the platform. We did run statistics with Hive and did not specify any settings not in the <code>hive-testbench</code>. Thus we suppose the query plans were as good as Hive will make them. Platform utilization was even across both machines, and varied between 30% and 100% of the 2 x 32 hardware threads.</a> </p> <p>Load time with Hive was 742 seconds against 232 seconds with Virtuoso. In both cases, this was a copy from 32 <a href="http://dbpedia.org/resource/Comma-separated_values" id="link-id0x2aab3be6e328">CSV</a> files into native database format; for Hive, this is <a href="https://cwiki.apache.org/confluence/display/Hive/LanguageManual+ORC" id="link-id0x2aab3b149868">ORC (Optimized Row Columnar)</a>. In Virtuoso, there is one index, (<code>o_custkey</code>); in Hive, there are no indices.</p> <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>Query</th> <th>Virtuoso</th> <th>Hive</th> <th>Notes</th> </tr> <tr> <th style="text-align:center;"> — </th> <td style="text-align:right;"> <code> 332 s </code> </td> <td style="text-align:right;"> <code> 742 s </code> </td> <td style="text-align:left;"> Data Load </td> </tr> <tr> <th style="text-align:center;"> Q1 </th> <td style="text-align:right;"> <code> 1.098 s </code> </td> <td style="text-align:right;"> <code> 296.636 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q2 </th> <td style="text-align:right;"> <code> 0.187 s </code> </td> <td style="text-align:right;"> <code> >3600 s </code> </td> <td style="text-align:left;"> Hive Timeout </td> </tr> <tr> <th style="text-align:center;"> Q3 </th> <td style="text-align:right;"> <code> 0.761 s </code> </td> <td style="text-align:right;"> <code> 98.652 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q4 </th> <td style="text-align:right;"> <code> 0.205 s </code> </td> <td style="text-align:right;"> <code> 147.867 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q5 </th> <td style="text-align:right;"> <code> 0.808 s </code> </td> <td style="text-align:right;"> <code> 114.782 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q6 </th> <td style="text-align:right;"> <code> 2.403 s </code> </td> <td style="text-align:right;"> <code> 71.789 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q7 </th> <td style="text-align:right;"> <code> 0.59 s </code> </td> <td style="text-align:right;"> <code> 394.201 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q8 </th> <td style="text-align:right;"> <code> 0.775 s </code> </td> <td style="text-align:right;"> <code> >3600 s </code> </td> <td style="text-align:left;"> Hive Timeout </td> </tr> <tr> <th style="text-align:center;"> Q9 </th> <td style="text-align:right;"> <code> 1.836 s </code> </td> <td style="text-align:right;"> <code> >3600 s </code> </td> <td style="text-align:left;"> Hive Timeout </td> </tr> <tr> <th style="text-align:center;"> Q10 </th> <td style="text-align:right;"> <code> 3.165 s </code> </td> <td style="text-align:right;"> <code> 179.646 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q11 </th> <td style="text-align:right;"> <code> 1.37 s </code> </td> <td style="text-align:right;"> <code> 43.094 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q12 </th> <td style="text-align:right;"> <code> 0.356 s </code> </td> <td style="text-align:right;"> <code> 101.193 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q13 </th> <td style="text-align:right;"> <code> 2.233 s </code> </td> <td style="text-align:right;"> <code> 208.476 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q14 </th> <td style="text-align:right;"> <code> 0.488 s </code> </td> <td style="text-align:right;"> <code> 89.047 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q15 </th> <td style="text-align:right;"> <code> 0.72 s </code> </td> <td style="text-align:right;"> <code> 136.431 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q16 </th> <td style="text-align:right;"> <code> 0.814 s </code> </td> <td style="text-align:right;"> <code> 105.652 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q17 </th> <td style="text-align:right;"> <code> 0.681 s </code> </td> <td style="text-align:right;"> <code> 255.848 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q18 </th> <td style="text-align:right;"> <code> 1.324 s </code> </td> <td style="text-align:right;"> <code> 337.921 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q19 </th> <td style="text-align:right;"> <code> 0.417 s </code> </td> <td style="text-align:right;"> <code> >3600 s </code> </td> <td style="text-align:left;"> Hive Timeout </td> </tr> <tr> <th style="text-align:center;"> Q20 </th> <td style="text-align:right;"> <code> 0.792 s </code> </td> <td style="text-align:right;"> <code> 193.965 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q21 </th> <td style="text-align:right;"> <code> 0.720 s </code> </td> <td style="text-align:right;"> <code> 670.718 s </code> </td> <td style="text-align:left;"> </td> </tr> <tr> <th style="text-align:center;"> Q22 </th> <td style="text-align:right;"> <code> 0.155 s </code> </td> <td style="text-align:right;"> <code> 68.462 s </code> </td> <td style="text-align:left;"> </td> </tr> </table> <p>Hive does relatively best on bulk load. This is understandable since this is a sequential read of many files in parallel with just compression to do.</p> <p>Hive's query times are obviously affected by not having a persistent memory image of the data, as this is always streamed from the storage files into other files as <a href="http://dbpedia.org/resource/MapReduce" id="link-id0x2aab3b57ad38">MapReduce</a> intermediate results. This seems to be an operator-at-a-time business as opposed to Virtuoso's vectorized streaming.</p> <p>The queries that would do partitioned <a href="http://dbpedia.org/resource/Hash_join" id="link-id0x2aab3bec82d8">hash joins</a> (e.g., Q9) did not finish under an hour in Hive, so we do not have a good metric of a cross-partition hash join.</p> <p>One could argue that one should benchmark Hive only in disk-bound circumstances. We may yet get to this.</p> <p>Our next stop will probably be <a href="https://en.wikipedia.org/wiki/Cloudera_Impala" id="link-id0x2aab3b4f7bc8">Impala</a>, which ought to do much better than Hive, as it dose not have the MapReduce overheads.</p> <p> <i><b>If you are a Hive expert</b> and believe that Hive should have done much better, please let us know how to improve the Hive scores, and we will retry.</i> </p> </div> <div class="spread_links"> <a href="http://technorati.com/cosmos/search.html?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1857" class="spread_link"><img src="/weblog/public/images/technorati.gif" alt="Find related stories via Technorati" title="Find related stories via Technorati" border="0" hspace="1" />related</a> <a href="http://del.icio.us/post?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1857&title=Big Data, Part 1: Virtuoso Meets Hive" class="spread_link"><img src="/weblog/public/images/delicious.gif" alt="Post to del.icio.us" title="Post to del.icio.us" border="0" hspace="1" />bookmark it!</a> <a href="http://www.digg.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1857&phase=2" class="spread_link"><img src="/weblog/public/images/digman.gif" alt="submit digg.com" title="submit digg.com" border="0" hspace="1" />digg it!</a> <a href="http://reddit.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1857&title=Big Data, Part 1: Virtuoso Meets Hive" class="spread_link"><img src="/weblog/public/images/reddithead.png" alt="post reddit" title="post reddit" border="0" hspace="1" />reddit!</a> <a href="http://twitter.com/share" class="twitter-share-button" data-count="horizontal" data-url="http://virtuoso.openlinksw.com/blog/?id=1857">Tweet</a> <script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script> </div> <div class="post-actions"> <a id="post_anchor1857" name="1857" class="noapp">#</a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1857">PermaLink</a> <a href="index.vspx?page=&id=1857&cmf=1#comments">Comments [0]</a> </div> <div class="pubdate"> <table cellpadding="0" cellspacing="0" width="100%"> <tr> <td> <span class="dc-date" property="dct:created">07/13/2015 12:17 GMT-0500</span> </td> <td align="right" colspan="3"> </td> </tr> </table> </div> </div> </div> <div id="post-1855"> <div class="message" typeof="sioct:BlogPost" about="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1855"> <div class="post-title"> <span class="dc-title" property="dc:title">Rethink Big and Europe?s Position in Big Data</span> </div> <div class="post-content" property="sioc:content"> <!-- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"><rdf:Description rdf:about="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-06-29#1855" dc:identifer="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-06-29#1855" dc:title="Rethink Big and Europe?s Position in Big Data" trackback:ping="http://virtuoso.openlinksw.com/mt-tb/Http/trackback?id=1855"></rdf:Description></rdf:RDF> --> <p>I will here take a break from core database and talk a bit about <a href="http://dbpedia.org/page/European_Union" id="link-id0x2aab76cf53a8">EU</a> policies for research funding.</p> <p>I had lunch with <a href="http://homepages.cwi.nl/~manegold/" id="link-id0x2aab74e7ff38">Stefan Manegold</a> of <a href="http://www.cwi.nl/" id="link-id0x2aab76fdf4a8">CWI</a> last week, where we talked about where European research should go. Stefan is involved in <i><a href="http://www.rethinkbig-project.eu/" id="link-id0x2aab767d4d88">RETHINK big</a>,</i> a European research project for compiling policy advice regarding big data for EC funding agencies. As part of this, he is interviewing various stakeholders such as end user organizations and developers of technology.</p> <p> <i>RETHINK big</i> wants to come up with a research agenda primarily for hardware, anything from faster networks to greener data centers. CWI represents software expertise in the consortium.</p> <p>So, we went through a regular questionnaire about how we see the landscape. I will summarize this below, as this is anyway informative.</p> <h3>Core competence</h3> <p>My own core competence is in core database functionality, specifically in high performance query processing, scale-out, and managing schema-less data. Most of the <a href="http://dbpedia.org/page/Virtuoso_Universal_Server" id="link-id0x2aab766406e8">Virtuoso</a> installed base is in the <a href="http://dbpedia.org/page/Resource_Description_Framework" id="link-id0x2aab74871c28">RDF</a> space, but most potential applications are in fact outside of this niche.</p> <h3>User challenges</h3> <p>The life sciences vertical is the one in which I have the most application insight, from going to <a href="http://dbpedia.org/resource/OpenPHACTS" id="link-id0x2aab765598c8">Open PHACTS</a> meetings and holding extensive conversations with domain specialists. We have users in many other verticals, from manufacturing to financial services, but there I do not have as much exposure to the actual applications.</p> <p>Having said this, the challenges throughout tend to be in diversity of data. Every researcher has their <a href="http://dbpedia.org/page/MySQL" id="link-id0x2aab76bff778">MySQL</a> database or <a href="http://dbpedia.org/page/Spreadsheet" id="link-id0x2aab75ceeaa8">spreadsheet</a>, and there may not even be a top level catalogue of everything. Data formats are diverse. Some people use <a href="http://dbpedia.org/resource/Linked_data" id="link-id0x2aab74e675e8">linked data</a> (most commonly RDF) as a top level metadata format. The application data, such as gene sequences or microarray assays, reside in their native file formats and there is little point in RDF-izing these.</p> <p>There are also public data resources that are published in RDF serializations as vendor-neutral, self-describing format. Having everything as triples, without <i>a priori</i> schema, makes things easier to integrate and in some cases easier to describe and query.</p> <p>So, the challenge is in the labor intensive nature of data integration. Data comes with different levels of quantity and quality, from hand-curated to <a href="http://dbpedia.org/resource/Natural_language_processing" id="link-id0x2aab76dfe0c8">NLP</a> extractions. Querying in the single- or double-digit terabyte range with RDF is quite possible, as we have shown many times on this blog, but most use cases do not even go that far. Anyway, what we see on the field is primarily a data diversity game. The scenario is data integration; the technology we provide is database. The data transformation proper, data cleansing, units of measure, entity de-duplication, and such core data-integration functions are performed using diverse, user-specific means.</p> <p> <a href="https://ch.linkedin.com/in/jervenbolleman" id="link-id0x2aab754e5e38">Jerven Bolleman</a> of the <a href="https://www.linkedin.com/company/sib-swiss-institute-of-bioinformatics" id="link-id0x2aab75d891d8">Swiss Institute of Bioinformatics</a> is a user of ours with whom we have long standing discussions on the virtues of federated data and querying. I advised Stefan to go talk to him; he has fresh views about the volume challenges with unexpected usage patterns. Designing for performance is tough if the usage pattern is out of the blue, like correlating air humidity on the day of measurement with the presence of some genomic patterns. Building a warehouse just for that might not be the preferred choice, so the problem field is not exhausted. Generally, I’d go for warehousing though.</p> <h3>What technology would you like to have? Network or power efficiency?</h3> <p>OK. Even a fast network is a network. A set of processes on a single shared-memory box is also a kind of network. InfiniBand is maybe half the throughput and 3x the latency of single threaded interprocess communication within one box. The operative word is latency. Making large systems always involves a network or something very much like one in large scale-up scenarios.</p> <p>On the software side, next to nobody understands latency and contention; yet these are the one core factor in any pursuit of scalability. Because of this situation, paradigms like <a href="http://dbpedia.org/page/MapReduce" id="link-id0x2aab76bea288">MapReduce</a> and <a href="http://dbpedia.org/page/Bulk_synchronous_parallel" id="link-id0x2aab75907c48">bulk synchronous parallel (BSP)</a> processing have become popular because these take the communication out of the program flow, so the programmer cannot muck this up, as otherwise would happen with the inevitability of destiny. Of course, our beloved SQL or declarative query in general does give scalability in many tasks without programmer participation. <a href="http://dbpedia.org/resource/Datalog" id="link-id0x2aab75e63cc8">Datalog</a> has also been used as a means of shipping computation around, as in the the work of <a href="http://www.linkedin.com/in/joehellerstein" id="link-id0x2aab75483c38">Hellerstein</a>.</p> <p>There are no easy solutions. We have built scale-out conscious, vectorized extensions to SQL procedures where one can express complex parallel, distributed flows, but people do not use or understand these. These are very useful, even indispensable, but only on the inside, not as a programmer-facing construct. MapReduce and BSP are the limit of what a development culture will absorb. MapReduce and BSP do not hide the fact of distributed processing. What about things that do? Parallel, partitioned extensions to <a href="http://dbpedia.org/resource/Fortran" id="link-id0x2aab76254d78">Fortran</a> <a href="http://dbpedia.org/page/Array_programming" id="link-id0x2aab75db6098">arrays</a>? <a href="http://dbpedia.org/page/Functional_programming" id="link-id0x2aab75ffb7b8">Functional languages</a>? I think that all the obvious aids to parallel/distributed programming have been conceived of. No silver bullet; just hard work. And above all the discernment of what paradigm fits what problem. Since these are always changing, there is no finite set of rules, and no substitute for understanding and insight, and the latter are vanishingly scarce. "<a href="http://dissoiblogoi.blogspot.com/2005/11/aristotles-paradigmatism.html" id="link-id0x2aab764f6738">Paradigmatism</a>," i.e., the belief that one particular programming model is a panacea outside of its original niche, is a common source of complexity and inefficiency. This is a common form of enthusiastic naïveté. </p> <p>If you look at power efficiency, the clusters that are the easiest to program consist of relatively few high power machines and a fast network. A typical node size is 16+ cores and 256G or more RAM. Amazon has these in entirely workable configurations, as <a href="http://www.openlinksw.com/weblogs/oerling/?id=1843" id="link-id0x2aab76396be8">documented earlier on this blog</a>. The leading edge in power efficiency is in larger number of smaller units, which makes life again harder. This exacerbates latency and forces one to partition the data more often, whereas one can play with replication of key parts of data more freely if the node size is larger.</p> <p>One very specific item where research might help without having to rebuild the hardware stack would be better, lower-latency exposure of networks to software. Lightweight threads and user-space access, bypassing slow protocol stacks, etc. <a href="https://en.wikipedia.org/wiki/Message_Passing_Interface" id="link-id0x2aab754d0208">MPI</a> has some of this, but maybe more could be done.</p> <p>So, I will take a cluster of such 16-core, 256GB machines on a faster network, over a cluster of 1024 x 4G mobile phones connected via USB. Very selfish and unecological, but one has to stay alive and life is tough enough as is.</p> <h3>Are there pressures to adapt business models based on big data?</h3> <p>The transition from <a href="http://dbpedia.org/page/Capital_expenditure" id="link-id0x2aab767114e8">capex</a> to <a href="http://dbpedia.org/page/Operating_expense" id="link-id0x2aab76254b28">opex</a> may be approaching maturity, as there have been workable cloud configurations for the past couple of years. The <a href="http://dbpedia.org/page/Amazon_Elastic_Compute_Cloud" id="link-id0x2aab75235fa8">EC2</a> from way back, with at best a 4 core 16G VM and a horrible network for $2/hr, is long gone. It remains the case that 4 months of 24x7 rent in the cloud equals the purchase price of physical hardware. So, for this to be economical long-term at scale, the average utilization should be about 10% of the peak, and peaks should not be on for more than 10% of the time.</p> <p>So, database software should be rented by the hour. A 100-150% markup for the $2.80 a large EC2 instance costs would be reasonable. Consider that 70% of the cost in TPC benchmarks is database software.</p> <p>There will be different pricing models combining different up-front and per-usage costs, just as there are for clouds now. If the platform business goes that way and the market accepts this, then systems software will follow. Price/performance quotes should probably be expressed as speed/price/hour instead of speed/price.</p> <p>The above is rather uncontroversial but there is no harm restating these facts. Reinforce often.</p> <h3>Well, the question is raised, what should Europe do that would have tangible impact in the next 5 years?</h3> <p>This is a harder question. There is some European business in wide area and mobile infrastructures. Competing against <a href="http://dbpedia.org/resource/Huawei" id="link-id0x2aab76c8a288">Huawei</a> will keep them busy. <a href="http://dbpedia.org/page/Intel" id="link-id0x2aab752b4178">Intel</a> and <a href="http://dbpedia.org/page/Mellanox_Technologies" id="link-id0x2aab753a84b8">Mellanox</a> will continue making faster networks regardless of European policies. Intel will continue building denser compute nodes, e.g., integrated Knight’s Corner with dual IB network and 16G fast RAM on chip. Clouds will continue making these available on demand once the technology is in mass production.</p> <p>What’s the next big innovation? <a href="http://dbpedia.org/page/Neuromorphic_engineering" id="link-id0x2aab76156358">Neuromorphic computing</a>? <a href="http://dbpedia.org/page/Quantum_computer" id="link-id0x2aab74efc458">Quantum computing</a>? Maybe. For now, I’d just do more engineering along the core competence discussed above, with emphasis on good marketing and scalable execution. By this I mean trained people who know something about deployment. There is a huge training gap. In the would-be "Age of Data," knowledge of how things actually work and scale is near-absent. I have offered to do some courses on this to partners and public alike, but I need somebody to drive this show; I have other things to do.</p> <p>I have been to many, many project review meetings, mostly as a project partner but also as reviewer. For the past year, the <a href="http://dbpedia.org/resource/European_Commission" id="link-id0x2aab7628cb88">EC</a> has used an innovation questionnaire at the end of the meetings. It is quite vague, and I don’t think it delivers much actionable intelligence.</p> <p>What would deliver this would be a venture capital type activity, with well-developed networks and active participation in developing a business. The EC is not now set up to perform this role, though. But the EC is a fairly large and wealthy entity, so it could invest some money via this type of channel. Also there should be higher individual incentives and rewards for speed and excellence. Getting the next Horizon 2020 research grant may be good, but better exists. The grants are competitive enough and the calls are not bad; they follow the times.</p> <p>In the projects I have seen, productization does get some attention, e.g., the <a href="http://stack.lod2.eu/blog/" id="link-id0x2aab768be028">LOD2 stack</a>, but it is not something that is really ongoing or with dedicated commercial backing. It may also be that there is no market to justify such dedicated backing. Much of the RDF work has been "me, too" — let’s do what the real database and data integration people do, but let’s just do this with triples. Innovation? Well, I took the best of the real DB world and adapted this to RDF, which did produce a competent piece of work with broad applicability, extending outside RDF. Is there better than this? Well, some of the data integration work (e.g., <a href="http://svn.aksw.org/papers/2011/WWW_LIMES/public.pdf" id="link-id0x2aab76423568">LIMES</a>) is not bad, and it might be picked up by some of the players that do this sort of thing in the broader world, e.g., <a href="http://dbpedia.org/resource/Informatica" id="link-id0x2aab753c3e68">Informatica</a>, the DI suites of big DB vendors, <a href="https://www.crunchbase.com/organization/tamr" id="link-id0x2aab76fc6e78">Tamr</a>, etc. I would not know if this in fact adds value to the non-RDF equivalents; I do not know the field well enough, but there could be a possibility.</p> <p>The recent emphasis for benchmarking, spearheaded by <a href="https://www.linkedin.com/in/stefanobertolo" id="link-id0x2aab76c45018">Stefano Bertolo</a> is good, as exemplified by the <a href="http://ldbcouncil.org/industry/organization/origins" id="link-id0x2aab75f4e288">LDBC FP7</a>. There should probably be one or two projects of this sort going at all times. These make challenges known and are an effective means of guiding research, with a large multiplier: Once a benchmark gets adopted, infinitely more work goes into solving the problem than in stating it in the first place.</p> <p>The aims and calls are good. The execution by projects is variable. For 1% of excellence, there apparently must be 99% of so-and-so, but this is just a fact of life and not specific to this context. The projects are rather diffuse. There is not a single outcome that gets all the effort. In this, the level of engagement of participants is less and focus is much more scattered than in startups. A really hungry, go-getter mood is mostly absent. I am a believer in core competence. Well, most people will agree that core competence is nice. But the projects I have seen do not drive for it hard enough.</p> <p>It is hard to say exactly what kinds of incentives could be offered to encourage truly exceptional work. The American startup scene does offer high rewards and something of this could be transplanted into the EC project world. I would not know exactly what form this could take, though.</p> </div> <div class="spread_links"> <a href="http://technorati.com/cosmos/search.html?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1855" class="spread_link"><img src="/weblog/public/images/technorati.gif" alt="Find related stories via Technorati" title="Find related stories via Technorati" border="0" hspace="1" />related</a> <a href="http://del.icio.us/post?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1855&title=Rethink Big and Europe?s Position in Big Data" class="spread_link"><img src="/weblog/public/images/delicious.gif" alt="Post to del.icio.us" title="Post to del.icio.us" border="0" hspace="1" />bookmark it!</a> <a href="http://www.digg.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1855&phase=2" class="spread_link"><img src="/weblog/public/images/digman.gif" alt="submit digg.com" title="submit digg.com" border="0" hspace="1" />digg it!</a> <a href="http://reddit.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1855&title=Rethink Big and Europe?s Position in Big Data" class="spread_link"><img src="/weblog/public/images/reddithead.png" alt="post reddit" title="post reddit" border="0" hspace="1" />reddit!</a> <a href="http://twitter.com/share" class="twitter-share-button" data-count="horizontal" data-url="http://virtuoso.openlinksw.com/blog/?id=1855">Tweet</a> <script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script> </div> <div class="post-actions"> <a id="post_anchor1855" name="1855" class="noapp">#</a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1855">PermaLink</a> <a href="index.vspx?page=&id=1855&cmf=1#comments">Comments [0]</a> </div> <div class="pubdate"> <table cellpadding="0" cellspacing="0" width="100%"> <tr> <td> <span class="dc-date" property="dct:created">06/29/2015 15:36 GMT-0500</span> </td> <td align="right" colspan="3"> </td> </tr> </table> </div> </div> </div> <div id="post-1853"> <div class="message" typeof="sioct:BlogPost" about="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1853"> <div class="post-title"> <span class="dc-title" property="dc:title">Virtuoso updated to version 7.2.1 </span> </div> <div class="post-content" property="sioc:content"> <!-- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"><rdf:Description rdf:about="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-06-29#1853" dc:identifer="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-06-29#1853" dc:title="Virtuoso updated to version 7.2.1 " trackback:ping="http://virtuoso.openlinksw.com/mt-tb/Http/trackback?id=1853"></rdf:Description></rdf:RDF> --> <p>We're pleased to announce that Virtuoso 7.2.1 is now available, and includes various enhancements and bug fixes. Important additions include new support for <code>xsd:boolean</code> and <code>TIMEZONE</code>-less <code>DATETIME</code> & <code>xsd:dateTime</code>; and significantly improved compatibility with the Jena and Sesame Frameworks.</p> <p>New product features as of June 24, 2015, v7.2.1, include:</p> <ul> <li> <p>Virtuoso Engine</p> <ul> <li>Added support for <code>TIMEZONE</code>-less <code>xsd:dateTime</code> and <code>DATETIME</code> </li> <li>Added support for <code>xsd:boolean</code> </li> <li>Added new text index functions </li> <li>Added better handling of HTTP status codes on SPARQL graph protocol endpoint </li> <li>Added new cache for compiled regular expressions </li> <li>Added support for expression in <code>TOP/SKIP</code> </li> </ul> </li> <li> <p>SPARQL</p> <ul> <li>Added support for SPARQL <code>GROUPING SETS</code> </li> <li>Added support for SPARQL 1.1 <code>EBV</code> (Efficient Boolean Value) </li> <li>Added support for <code>define input:with-fallback-graph_uri </code> </li> <li>Added support for <code>define input:target-fallback-graph-uri</code> </li> </ul> </li> <li> <p>Jena & Sesame Compatibility</p> <ul> <li>Added support for using <code>rdf_insert_triple_c()</code> to insert BNode data </li> <li>Added support for returning <code>xsd:boolean</code> as <code>true/false</code> rather than <code>1/0</code> </li> <li>Added support for <code>maxQueryTimeout</code> in Sesame2 provider </li> </ul> </li> <li> <p>JDBC Driver</p> <ul> <li>Added new methods <code>setLogFileName</code> and <code>getLogFileName</code> </li> <li>Added new attribute "<code>logFileName</code>" to <code>VirtuosoDataSources</code> for logging support </li> </ul> </li> <li> <p>Faceted Browser</p> <ul> <li>Added support for emitting HTML5+Microdata instead of RDFa as default HTML page </li> <li>Added query optimizations </li> <li>Added new footer icons to /describe page </li> </ul> </li> <li> <p>Conductor and DAV</p> <ul> <li>Added support for VAD dependency tree </li> <li>Added support for default vdirs when creating new listeners </li> <li>Added support for private RDF graphs </li> <li>Added support for LDP in DAV API </li> <li>Added option to create shared folder if not present </li> <li>Added option to enable/disable DET graphs binding </li> <li>Added option to set content length threshold for asynchronous sponging </li> <li>Added folder option related to <code>.TTL</code> redirection </li> <li>Added functions to edit turtle files </li> <li>Added popup dialog to search for unknown prefixes </li> <li>Added registry option to add missing prefixes for <code>.TTL</code> files </li> </ul> </li> </ul> More details of the additions, fixes, and other changes in this update of both Open Source and Commercial Editions, may be found on <a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/VOSNews#2015-06-24%20-%20Virtuoso%20Open-Source%20Edition%207.2.1%20Released" id="link-id0x2aab74ec1248">the Virtuoso News page</a>. Additional Information: <ul> <li> <p>Virtuoso Commercial Edition</p> <ul> <li> <a href="http://virtuoso.openlinksw.com/" id="link-id0x2aab75e9ead8">Home Page</a> </li> <li> <a href="http://virtuoso.openlinksw.com/download/" id="link-id0x2aab75e9ebe8">Download Page</a> </li> </ul> </li> <li> <p>Virtuoso Open Source Edition </p> <ul> <li> <a href="https://github.com/openlink/virtuoso-opensource/tree/develop/7" id="link-id0x2aab75e9eef8">Development Branch</a> </li> <li> <a href="https://github.com/openlink/virtuoso-opensource/tree/stable/7" id="link-id0x2aab75e9f048">Stable Branch</a> </li> </ul> </li> </ul> </div> <div class="spread_links"> <a href="http://technorati.com/cosmos/search.html?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1853" class="spread_link"><img src="/weblog/public/images/technorati.gif" alt="Find related stories via Technorati" title="Find related stories via Technorati" border="0" hspace="1" />related</a> <a href="http://del.icio.us/post?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1853&title=Virtuoso updated to version 7.2.1 " class="spread_link"><img src="/weblog/public/images/delicious.gif" alt="Post to del.icio.us" title="Post to del.icio.us" border="0" hspace="1" />bookmark it!</a> <a href="http://www.digg.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1853&phase=2" class="spread_link"><img src="/weblog/public/images/digman.gif" alt="submit digg.com" title="submit digg.com" border="0" hspace="1" />digg it!</a> <a href="http://reddit.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1853&title=Virtuoso updated to version 7.2.1 " class="spread_link"><img src="/weblog/public/images/reddithead.png" alt="post reddit" title="post reddit" border="0" hspace="1" />reddit!</a> <a href="http://twitter.com/share" class="twitter-share-button" data-count="horizontal" data-url="http://virtuoso.openlinksw.com/blog/?id=1853">Tweet</a> <script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script> </div> <div class="post-actions"> <a id="post_anchor1853" name="1853" class="noapp">#</a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1853">PermaLink</a> <a href="index.vspx?page=&id=1853&cmf=1#comments">Comments [0]</a> </div> <div class="pubdate"> <table cellpadding="0" cellspacing="0" width="100%"> <tr> <td> <span class="dc-date" property="dct:created">06/29/2015 15:21 GMT-0500</span> </td> <td align="right" colspan="3"> </td> </tr> </table> </div> </div> </div> <div id="post-1850"> <div class="message" typeof="sioct:BlogPost" about="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1850"> <div class="post-title"> <span class="dc-title" property="dc:title">Virtuoso Elastic Cluster Benchmarks AMI on Amazon EC2</span> </div> <div class="post-content" property="sioc:content"> <!-- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"><rdf:Description rdf:about="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-06-16#1850" dc:identifer="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-06-16#1850" dc:title="Virtuoso Elastic Cluster Benchmarks AMI on Amazon EC2" trackback:ping="http://virtuoso.openlinksw.com/mt-tb/Http/trackback?id=1850"></rdf:Description></rdf:RDF> --> <p>We have another new Amazon machine image, this time for deploying your own Virtuoso Elastic Cluster on the cloud. The <a href="http://www.openlinksw.com/weblog/oerling/?id=1845" id="link-id0x10cd1dc38">previous post</a> gave a summary of running TPC-H on this image. This post is about what the AMI consists of and how to set it up.</p> <p> <i><b>Note:</b> This AMI is running a pre-release build of Virtuoso 7.5, Commercial Edition. Features are subject to change, and this build is not licensed for any use other than the AMI-based benchmarking described herein.</i> </p> <p>There are two preconfigured cluster setups; one is for two (2) machines/instances and one is for four (4). Generation and loading of TPC-H data, as well as the benchmark run itself, is preconfigured, so you can do it by entering just a few commands. The whole sequence of doing a terabyte (1000G) scale TPC-H takes under two hours, with 30 minutes to generate the data, 35 minutes to load, and 35 minutes to do three benchmark runs. The 100G scale is several times faster still.</p> <p>To experiment with this AMI, you will need a set of license files, one per machine/instance, which <a href="http://www.openlinksw.com/contact/" id="link-id0x10ca15da8">our Sales Team can provide</a>.</p> <p>Detailed instructions are on the AMI, in <code>/home/ec2-user/<a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150616ElasticClusterSetup/cluster_instructions.txt" title="cluster_instructions.txt" alt="Detailed Cluster Setup Instructions" id="link-id0x2aab90d25058">cluster_instructions.txt</a></code>, but the basic steps to get up and running are as follows:</p> <ol> <li> <p>Instantiate machine image <b>ami-811becea)</b> (AMI ID is subject to change; you should be able to find the latest by searching for "OpenLink Virtuoso Benchmarks" in "Community AMIs"; this one is short-named <code>virtuoso-bench-cl</code>) with two or four (2 or 4) R3.8xlarge instances within one virtual private cluster and placement group. Make sure the VPC security is set to allow all connections.</p> </li> <li> <p>Log in to the first, and fill in the configuration file with the internal IP addresses of all machines instantiated in step 1.</p> </li> <li> <p>Distribute the license files to the instances, and start the OpenLink License Manager on each machine.</p> </li> <li> <p>Run 3 shell commands to set up the file systems and the Virtuoso configuration files.</p> </li> <li> <p>If you do not plan to run one of these benchmarks, you can simply start and work with the Virtuoso cluster now. It is ready for use with an empty database.</p> </li> <li> <p>Before running one of these benchmark, generate the appropriate dataset with the <code>dbgen.sh</code> command.</p> </li> <li> <p>Bulk load the data with <code>load.sh</code>.</p> </li> <li> <p>Run the benchmark with <code>run.sh</code>.</p> </li> </ol> <p>Right now the cluster benchmarks are limited to TPC-H but cluster versions of the LDBC Social Network and Semantic Publishing benchmarks will follow soon.</p> </div> <div class="spread_links"> <a href="http://technorati.com/cosmos/search.html?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1850" class="spread_link"><img src="/weblog/public/images/technorati.gif" alt="Find related stories via Technorati" title="Find related stories via Technorati" border="0" hspace="1" />related</a> <a href="http://del.icio.us/post?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1850&title=Virtuoso Elastic Cluster Benchmarks AMI on Amazon EC2" class="spread_link"><img src="/weblog/public/images/delicious.gif" alt="Post to del.icio.us" title="Post to del.icio.us" border="0" hspace="1" />bookmark it!</a> <a href="http://www.digg.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1850&phase=2" class="spread_link"><img src="/weblog/public/images/digman.gif" alt="submit digg.com" title="submit digg.com" border="0" hspace="1" />digg it!</a> <a href="http://reddit.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1850&title=Virtuoso Elastic Cluster Benchmarks AMI on Amazon EC2" class="spread_link"><img src="/weblog/public/images/reddithead.png" alt="post reddit" title="post reddit" border="0" hspace="1" />reddit!</a> <a href="http://twitter.com/share" class="twitter-share-button" data-count="horizontal" data-url="http://virtuoso.openlinksw.com/blog/?id=1850">Tweet</a> <script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script> </div> <div class="post-actions"> <a id="post_anchor1850" name="1850" class="noapp">#</a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1850">PermaLink</a> <a href="index.vspx?page=&id=1850&cmf=1#comments">Comments [0]</a> </div> <div class="pubdate"> <table cellpadding="0" cellspacing="0" width="100%"> <tr> <td> <span class="dc-date" property="dct:created">06/16/2015 17:53 GMT-0500</span> </td> <td align="right" colspan="3"> Modified: <span property="dct:modified"><span class="modified-date">06/17/2015 10:13 GMT-0500</span> </span> </td> </tr> </table> </div> </div> </div> <div id="post-1846"> <div class="message" typeof="sioct:BlogPost" about="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1846"> <div class="post-title"> <span class="dc-title" property="dc:title">In Hoc Signo Vinces (part 21 of n): Running TPC-H on Virtuoso Elastic Cluster on Amazon EC2</span> </div> <div class="post-content" property="sioc:content"> <!-- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"><rdf:Description rdf:about="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-06-10#1846" dc:identifer="http://virtuoso.openlinksw.com/blog/vdb/blog/?date=2015-06-10#1846" dc:title="In Hoc Signo Vinces (part 21 of n): Running TPC-H on Virtuoso Elastic Cluster on Amazon EC2" trackback:ping="http://virtuoso.openlinksw.com/mt-tb/Http/trackback?id=1846"></rdf:Description></rdf:RDF> --> <p>We have made an Amazon EC2 deployment of <a href="http://virtuoso.openlinksw.com/features-comparison-matrix/#cluster" id="link-id0x2aabd14557d8">Virtuoso 7 Commercial Edition, configured to use the Elastic Cluster Module</a> with TPC-H preconfigured, similar to the <a href="http://www.openlinksw.com/weblogs/oerling/?id=1843" id="link-id0x2aabd235a378">recently published OpenLink Virtuoso Benchmark AMI</a> running the Open Source Edition. The details of the new Elastic Cluster AMI and steps to use it will be published in a forthcoming post. Here we will simply look at results of running TPC-H 100G scale on two machines, and 1000G scale on four machines. This shows how Virtuoso provides great performance on a cloud platform. The extremely fast bulk load — 33 minutes for a terabyte! — means that you can get straight to work even with on-demand infrastructure.</p> <p>In the following, the Amazon instance type is R3.8xlarge, each with dual Xeon E5-2670 v2, 244G RAM, and 2 x 300G SSD. The image is made from the Amazon Linux with built-in network optimization. We first tried a RedHat image without network optimization and had considerable trouble with the interconnect. Using network-optimized Amazon Linux images inside a virtual private cloud has resolved all these problems.</p> <p>The network optimized 10GE interconnect at Amazon offers throughput close to the QDR InfiniBand running TCP-IP; thus the Amazon platform is suitable for running cluster databases. The execution that we have seen is not seriously network bound.</p> <h3>100G on 2 machines, with a total of 32 cores, 64 threads, 488 GB RAM, 4 x 300 GB SSD</h3> <b>Load time:</b> 3m 52s <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>Run</th> <th>Power</th> <th>Throughput</th> <th>Composite</th> </tr> <tr> <th style="text-align:center;"><code>1</code> </th> <td style="text-align:right;"><code>523,554.3</code> </td> <td style="text-align:right;"><code>590,692.6</code> </td> <td style="text-align:right;"><code>556,111.2</code> </td> </tr> <tr> <th style="text-align:center;"><code>2</code> </th> <td style="text-align:right;"><code>565,353.3</code> </td> <td style="text-align:right;"><code>642,503.0</code> </td> <td style="text-align:right;"><code>602,694.9</code> </td> </tr> </table> <h3>1000G on 4 machines, with a total of 64 cores, 128 threads, 976 GB RAM, 8 x 300 GB SSD</h3> <b>Load time:</b> 32m 47s <table style="padding:10px;border-spacing:10px;margin-left:auto;margin-right:auto;"> <tr style="text-align:center;"> <th>Run</th> <th>Power</th> <th>Throughput</th> <th>Composite</th> </tr> <tr> <th style="text-align:center;"><code>1</code> </th> <td style="text-align:right;"><code>592,013.9</code> </td> <td style="text-align:right;"><code>754,107.6</code> </td> <td style="text-align:right;"><code>668,163.3</code> </td> </tr> <tr> <th style="text-align:center;"><code>2</code> </th> <td style="text-align:right;"><code>896,564.1</code> </td> <td style="text-align:right;"><code>828,265.4</code> </td> <td style="text-align:right;"><code>861,738.4</code> </td> </tr> <tr> <th style="text-align:center;"><code>3</code> </th> <td style="text-align:right;"><code>883,736.9</code> </td> <td style="text-align:right;"><code>829,609.0</code> </td> <td style="text-align:right;"><code>856,245.3</code> </td> </tr> </table> <p>For the larger scale we did 3 sets of power + throughput tests to measure consistency of performance. By the TPC-H rules, the worst (first) score should be reported. Even after bulk load, this is markedly less than the next power score due to working set effects. This is seen to a lesser degree with the first throughput score also.</p> <p>The numerical quantities summaries are available <a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150610TPCHonClusterAMI/report.zip" id="link-id0x2aabafe59058">in a report.zip file</a>, or individually --</p> <ul> <li> <code><a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150610TPCHonClusterAMI/report-100-1.txt" style="wikiautogen" id="link-id0x2aac0d449ff8">report-100-1.txt</a> </code> </li> <li> <code><a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150610TPCHonClusterAMI/report-100-2.txt" style="wikiautogen" id="link-id0x2aab628ae658">report-100-2.txt</a> </code> </li> <li> <code><a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150610TPCHonClusterAMI/report-1000-1.txt" style="wikiautogen" id="link-id0x2aab637b1078">report-1000-1.txt</a> </code> </li> <li> <code><a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150610TPCHonClusterAMI/report-1000-2.txt" style="wikiautogen" id="link-id0x2aac0db30728">report-1000-2.txt</a> </code> </li> <li> <code><a href="http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/BlogFiles20150610TPCHonClusterAMI/report-1000-3.txt" style="wikiautogen" id="link-id0x2aac0d462b08">report-1000-3.txt</a> </code> </li> </ul> <p>Subsequent posts will explain how to deploy Virtuoso Elastic Clusters on AWS.</p> <h3> <i>In Hoc Signo Vinces</i> (TPC-H) Series</h3> <ul> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1739" id="link-id0x2aab52abc0b8"> In Hoc Signo Vinces (part 1): Virtuoso meets TPC-H</a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1741" id="link-id0x2aab53935178"> In Hoc Signo Vinces (part 2): TPC-H Schema Choices</a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1742" id="link-id0x2aab537ea348"> In Hoc Signo Vinces (part 3): Benchmark Configuration Settings</a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1744" id="link-id0x2aab516a28b8"> In Hoc Signo Vinces (part 4): Bulk Load and Refresh</a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1747" id="link-id0x2aab514b53d8"> In Hoc Signo Vinces (part 5): The Return of SQL Federation</a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1753" id="link-id0x2aaba8e12aa8"> In Hoc Signo Vinces (part 6): TPC-H Q1 and Q3: An Introduction to Query Plans</a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1755" id="link-id0x2aab537ea308"> In Hoc Signo Vinces (part 7): TPC-H Q13: The Good and the Bad Plans</a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1756" id="link-id0x2aaba97e18d8"> In Hoc Signo Vinces (part 8): TPC-H: INs, Expressions, ORs</a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1779" id="link-id0x2aab52a6add8"> In Hoc Signo Vinces (part 9): TPC-H Q18, Ordered Aggregation, and Top K</a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1789" id="link-id0x2aaba9725b78"> In Hoc Signo Vinces (part 10): TPC-H Q9, Q17, Q20 - Predicate Games </a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1793" id="link-id0x2aab51109e68"> In Hoc Signo Vinces (part 11): TPC-H Q2, Q10 - Late Projection </a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1796" id="link-id0x2aaba97e1898"> In Hoc Signo Vinces (part 12): TPC-H: Result Preview </a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1798" id="link-id0x2aab51a706a8"> In Hoc Signo Vinces (part 13): Virtuoso TPC-H Kit Now on V7 Fast Track </a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1800" id="link-id0x2aaba97e18b8"> In Hoc Signo Vinces (part 14): Virtuoso TPC-H Implementation Analysis </a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1802" id="link-id0x2aaba9725b98"> In Hoc Signo Vinces (part 15): TPC-H and the Science of Hash </a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1816" id="link-id0x2aab52abc098"> In Hoc Signo Vinces (part 16): Introduction to Scale-Out </a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1818" id="link-id0x2aab5130e068"> In Hoc Signo Vinces (part 17): 100G and 300G Runs on Dual Xeon E5 2650v2 </a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1819" id="link-id0x2aab52979f88"> In Hoc Signo Vinces (part 18): Cluster Dynamics </a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1822" id="link-id0x2aab53015408"> In Hoc Signo Vinces (part 19): Scalability, 1000G, and 3000G </a> </li> <li> <a href="http://www.openlinksw.com/weblog/oerling/?id=1822" id="link-id0x2aab53015408"> In Hoc Signo Vinces (part 20): 100G and 1000G With Cluster; When is Cluster Worthwhile; Effects of I/O </a> </li> <li> In Hoc Signo Vinces (part 21): Running TPC-H on Virtuoso Cluster on Amazon EC2<i> (this post)</i> </li> </ul> </div> <div class="spread_links"> <a href="http://technorati.com/cosmos/search.html?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1846" class="spread_link"><img src="/weblog/public/images/technorati.gif" alt="Find related stories via Technorati" title="Find related stories via Technorati" border="0" hspace="1" />related</a> <a href="http://del.icio.us/post?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1846&title=In Hoc Signo Vinces (part 21 of n): Running TPC-H on Virtuoso Elastic Cluster on Amazon EC2" class="spread_link"><img src="/weblog/public/images/delicious.gif" alt="Post to del.icio.us" title="Post to del.icio.us" border="0" hspace="1" />bookmark it!</a> <a href="http://www.digg.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1846&phase=2" class="spread_link"><img src="/weblog/public/images/digman.gif" alt="submit digg.com" title="submit digg.com" border="0" hspace="1" />digg it!</a> <a href="http://reddit.com/submit?url=http%3A%2F%2Fvirtuoso.openlinksw.com%2Fblog%2F%3Fid%3D1846&title=In Hoc Signo Vinces (part 21 of n): Running TPC-H on Virtuoso Elastic Cluster on Amazon EC2" class="spread_link"><img src="/weblog/public/images/reddithead.png" alt="post reddit" title="post reddit" border="0" hspace="1" />reddit!</a> <a href="http://twitter.com/share" class="twitter-share-button" data-count="horizontal" data-url="http://virtuoso.openlinksw.com/blog/?id=1846">Tweet</a> <script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script> </div> <div class="post-actions"> <a id="post_anchor1846" name="1846" class="noapp">#</a> <a href="http://www.openlinksw.com/dataspace/vdb/weblog/vdb%27s%20BLOG%20%5B136%5D/1846">PermaLink</a> <a href="index.vspx?page=&id=1846&cmf=1#comments">Comments [0]</a> </div> <div class="pubdate"> <table cellpadding="0" cellspacing="0" width="100%"> <tr> <td> <span class="dc-date" property="dct:created">06/10/2015 12:04 GMT-0500</span> </td> <td align="right" colspan="3"> Modified: <span property="dct:modified"><span class="modified-date">06/10/2015 12:49 GMT-0500</span> </span> </td> </tr> </table> </div> </div> </div> <div>  <a href="#"><<</a> <input type="hidden" name="posts_offs" value="0" /> | <a href="#" onclick="javascript: document.forms['page_form'].posts_offs.value = 0; doPost ('page_form', 'posts_idx'); return false"><b>1</b></a> | <a href="#" onclick="javascript: document.forms['page_form'].posts_offs.value = 1; doPost ('page_form', 'posts_idx'); return false">2</a> | <a href="#" onclick="javascript: document.forms['page_form'].posts_offs.value = 2; doPost ('page_form', 'posts_idx'); return false">3</a> | <a href="#" onclick="javascript: document.forms['page_form'].posts_offs.value = 3; doPost ('page_form', 'posts_idx'); return false">4</a> | <a href="#" onclick="javascript: document.forms['page_form'].posts_offs.value = 4; doPost ('page_form', 'posts_idx'); return false">5</a> | <a href="#" onclick="javascript: document.forms['page_form'].posts_offs.value = 5; doPost ('page_form', 'posts_idx'); return false">6</a> | <a href="#" onclick="javascript: document.forms['page_form'].posts_offs.value = 6; doPost ('page_form', 'posts_idx'); return false">7</a> | <a href="#" onclick="javascript: document.forms['page_form'].posts_offs.value = 7; doPost ('page_form', 'posts_idx'); return false">8</a> | <a href="#" onclick="javascript: document.forms['page_form'].posts_offs.value = 8; doPost ('page_form', 'posts_idx'); return false">9</a> | <a href="#" onclick="javascript: document.forms['page_form'].posts_offs.value = 9; doPost ('page_form', 'posts_idx'); return false">10</a> | <a href="#" onclick="javascript: document.forms['page_form'].posts_offs.value = 10; doPost ('page_form', 'posts_idx'); return false">11</a> | <a href="javascript:void(0)" onclick="javascript: doPost ('page_form', 'posts_next'); return false" title="Next" >>></a> </div> </td> <td class="box" id="right"> <div class="box"> <table id="calendar"> <tr> <td colspan="7"> <select name="pmon1" class="select" onchange="doAutoSubmit (this.form, this)"><option value="1" selected="selected">January</option><option value="2" >February</option><option value="3" >March</option><option value="4" >April</option><option value="5" >May</option><option value="6" >June</option><option value="7" >July</option><option value="8" >August</option><option value="9" >September</option><option value="10" >October</option><option value="11" >November</option><option value="12" >December</option></select> <select name="pyear" class="select" onchange="doAutoSubmit (this.form, this)"><option value="2003" >2003</option><option value="2004" >2004</option><option value="2005" >2005</option><option value="2006" >2006</option><option value="2007" >2007</option><option value="2008" >2008</option><option value="2009" >2009</option><option value="2010" >2010</option><option value="2011" >2011</option><option value="2012" >2012</option><option value="2013" >2013</option><option value="2014" >2014</option><option value="2015" >2015</option><option value="2016" selected="selected">2016</option></select> </td> </tr> <tr><th>Sun</th><th>Mon</th><th>Tue</th><th>Wed</th><th>Thu</th><th>Fri</th><th>Sat</th> </tr> <tr><td> </td><td> </td><td> </td><td> </td><td> </td> <td class="calnotactive"> <a href="javascript: void(0)">1 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">2 </a> </td> </tr> <tr> <td class="calnotactive"> <a href="javascript: void(0)">3 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">4 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">5 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">6 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">7 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">8 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">9 </a> </td> </tr> <tr> <td class="calnotactive"> <a href="javascript: void(0)">10 </a> </td> <td class="calactive"> <a href="index.vspx?date=2016-01-11">11 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">12 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">13 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">14 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">15 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">16 </a> </td> </tr> <tr> <td class="calnotactive"> <a href="javascript: void(0)">17 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">18 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">19 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">20 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">21 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">22 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">23 </a> </td> </tr> <tr> <td class="calnotactive"> <a href="javascript: void(0)">24 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">25 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">26 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">27 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">28 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">29 </a> </td> <td class="calnotactive"> <a href="javascript: void(0)">30 </a> </td> </tr> <tr> <td class="calnotactive"> <a href="javascript: void(0)">31 </a> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td> </tr> <tr> <td><a href="index.vspx?date=2015-08-00" class="real_button" title="Previous"><</a> </td> <td colspan="5" align="center"><a class="button" href="index.vspx?page=&date=2024-12-03">today</a> </td> <td> </td> </tr> </table> <br /> <div class="roll"> <h2>Blog Roll<a href="http://virtuoso.openlinksw.com/blog/gems/opml.xml?:c=6" class="opml-link"><img border="0" alt="OPML" title="OPML" src="/weblog/public/images/blue-icon-16.gif" hspace="3" /> OPML</a><a href="http://virtuoso.openlinksw.com/blog/gems/ocs.xml?:c=6" class="ocs-link"><img border="0" alt="OCS" title="OCS" src="/weblog/public/images/blue-icon-16.gif" hspace="3" /> OCS</a> </h2> <div> <a href="http://www.openlinksw.com/blog/~kidehen/gems/atom.xml"><img src="/weblog/public/images/atom-icon-16.gif" border="0" alt="ATOM" title="ATOM" /> </a> <a href="http://www.openlinksw.com/weblog/kidehen@openlinksw.com/127/" rel="">Kingsley Idehen's Weblog </a> </div> <div> <a href="http://www.openlinksw.com/weblogs/oerling/gems/rss.xml"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://www.openlinksw.com/weblogs/oerling/" rel="contact met co-worker">Orri Erling's Weblog </a> </div> </div> <div class="roll"> <h2>Documentation (Atom Feed)<a href="http://virtuoso.openlinksw.com/blog/gems/opml.xml?:c=25" class="opml-link"><img border="0" alt="OPML" title="OPML" src="/weblog/public/images/blue-icon-16.gif" hspace="3" /> OPML</a><a href="http://virtuoso.openlinksw.com/blog/gems/ocs.xml?:c=25" class="ocs-link"><img border="0" alt="OCS" title="OCS" src="/weblog/public/images/blue-icon-16.gif" hspace="3" /> OCS</a> </h2> <div> <a href="http://docs.openlinksw.com/virtuoso/rdfandsparql.xml"><img src="/weblog/public/images/rdf-icon-16.gif" border="0" alt="RDF" title="RDF" /> </a> <a href="http://docs.openlinksw.com/virtuoso/rdfandsparql.html" rel="">Chapter: RDF Database and SPARQL </a> </div> </div> <div class="roll"> <h2>Documentation (RDF Feed)<a href="http://virtuoso.openlinksw.com/blog/gems/opml.xml?:c=24" class="opml-link"><img border="0" alt="OPML" title="OPML" src="/weblog/public/images/blue-icon-16.gif" hspace="3" /> OPML</a><a href="http://virtuoso.openlinksw.com/blog/gems/ocs.xml?:c=24" class="ocs-link"><img border="0" alt="OCS" title="OCS" src="/weblog/public/images/blue-icon-16.gif" hspace="3" /> OCS</a> </h2> <div> <a href="http://docs.openlinksw.com/virtuoso/rdfandsparql.rdf"><img src="/weblog/public/images/rdf-icon-16.gif" border="0" alt="RDF" title="RDF" /> </a> <a href="http://docs.openlinksw.com/virtuoso/rdfandsparql.html" rel="">Chapter: RDF Database and SPARQL </a> </div> </div> <div class="roll"> <h2>Online Demos & Tutorials<a href="http://virtuoso.openlinksw.com/blog/gems/opml.xml?:c=17" class="opml-link"><img border="0" alt="OPML" title="OPML" src="/weblog/public/images/blue-icon-16.gif" hspace="3" /> OPML</a><a href="http://virtuoso.openlinksw.com/blog/gems/ocs.xml?:c=17" class="ocs-link"><img border="0" alt="OCS" title="OCS" src="/weblog/public/images/blue-icon-16.gif" hspace="3" /> OCS</a> </h2> <div> <a href="http://demo.openlinksw.com/tutorial/apps/rss.vsp"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://demo.openlinksw.com/tutorial/apps/index.vsp" rel="">Demo Applications </a> </div> <div> <a href="http://demo.openlinksw.com/tutorial/nntp/rss.vsp"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://demo.openlinksw.com/tutorial/intprot/" rel="">Internet Protocols (NNTP) </a> </div> <div> <a href="http://demo.openlinksw.com/tutorial/smtp/rss.vsp"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://demo.openlinksw.com/tutorial/intprot/" rel="">Internet Protocols (SMTP & POP3) </a> </div> <div> <a href="http://demo.openlinksw.com/tutorial/repl/rss.vsp"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://demo.openlinksw.com/tutorial/repl/index.vsp" rel="">Replication </a> </div> <div> <a href="http://demo.openlinksw.com/tutorial/hosting/rss.vsp"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://demo.openlinksw.com/tutorial/hosting/index.vsp" rel="">Runtime Hosting (.NET CLR, Mono, ASP.NET, Java, PHP, Perl, Python) </a> </div> <div> <a href="http://demo.openlinksw.com/tutorial/web/rss.vsp"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://demo.openlinksw.com/tutorial/web/index.vsp" rel="">Web Application programming </a> </div> <div> <a href="http://demo.openlinksw.com/tutorial/services/rss.vsp"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://demo.openlinksw.com/tutorial/services/index.vsp" rel="">Web Services (SOAP,WSDL,UDDI, WS-Security, WS-Routing etc.) </a> </div> <div> <a href="http://demo.openlinksw.com/tutorial/xml/rss.vsp"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://demo.openlinksw.com/tutorial/xml/index.vsp" rel="">XML Services (XSL-T, SQLX, XPath, XQuery, and XML Schema) </a> </div> </div> <div class="roll"> <h2>Online Documentation<a href="http://virtuoso.openlinksw.com/blog/gems/opml.xml?:c=16" class="opml-link"><img border="0" alt="OPML" title="OPML" src="/weblog/public/images/blue-icon-16.gif" hspace="3" /> OPML</a><a href="http://virtuoso.openlinksw.com/blog/gems/ocs.xml?:c=16" class="ocs-link"><img border="0" alt="OCS" title="OCS" src="/weblog/public/images/blue-icon-16.gif" hspace="3" /> OCS</a> </h2> <div> <a href="http://docs.openlinksw.com/virtuoso/appendixa.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/appendixa.html" rel="">Appendix </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/concepts.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/concepts.html" rel="">Conceptual Overview </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/accessinterfaces.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/accessinterfaces.html" rel="">Data Access Interfaces </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/repl.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/repl.html" rel="">Data Replication, Synchronization and Transformation Services </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/hooks.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/hooks.html" rel="">Database Event Hooks </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/freetext.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/freetext.html" rel="">Free Text Search </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/installation.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/installation.html" rel="">Installation Guide </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/internetservices.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/internetservices.html" rel="">Internet Services </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/overview.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/overview.html" rel="">Overview </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/quicktours.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/quicktours.html" rel="">Quick Start & Tours </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/runtimehosting.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/runtimehosting.html" rel="">Runtime Hosting </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/sampleapps.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/sampleapps.html" rel="">Sample ODBC & JDBC Applications </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/server.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/server.html" rel="">Server Administration </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/sqlprocedures.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/sqlprocedures.html" rel="">SQL Procedure Language Guide </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/sqlreference.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/sqlreference.html" rel="">SQL Reference </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/tpcc.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/tpcc.html" rel="">TPC C Benchmark Kit </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/functions.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/functions.html" rel="">Virtuoso Functions Guide </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/adminui.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/adminui.html" rel="">Visual Server Administration Interface </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/webappdevelopment.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/webappdevelopment.html" rel="">Web Application Development </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/webservices.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/webservices.html" rel="">Web Services </a> </div> <div> <a href="http://docs.openlinksw.com/virtuoso/webandxml.rss"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://docs.openlinksw.com/virtuoso/webandxml.html" rel="">XML Services (XSLT-T, XPath, XQuery, XML Schema) </a> </div> </div> <div class="roll"> <h2>Support<a href="http://virtuoso.openlinksw.com/blog/gems/opml.xml?:c=18" class="opml-link"><img border="0" alt="OPML" title="OPML" src="/weblog/public/images/blue-icon-16.gif" hspace="3" /> OPML</a><a href="http://virtuoso.openlinksw.com/blog/gems/ocs.xml?:c=18" class="ocs-link"><img border="0" alt="OCS" title="OCS" src="/weblog/public/images/blue-icon-16.gif" hspace="3" /> OCS</a> </h2> <div> <a href="http://demo.openlinksw.com/viewlets/virtuoso_viewlets_rss.vsp"><img src="/weblog/public/images/rss-icon-16.gif" border="0" alt="RSS" title="RSS" /> </a> <a href="http://support.openlinksw.com/viewlets/virtuoso_viewlets_rss.vsp" rel="">Viewlets for OpenLink Virtuoso Universal Server </a> </div> </div> </div> <div class="box"> </div> </td> </tr> <tr> <td colspan="3"> <div id="powered"> <a href="http://www.openlinksw.com/virtuoso/"><img alt="Powered by OpenLink Virtuoso Universal Server" border="0" src="/weblog/public/images/PoweredByVirtuoso.gif" /> </a><br/>Running on Linux platform<br/></div> <div id="copy">OpenLink Software 1998-2006</div> <div id="disclaimer"></div> </td> </tr> </table> </form> </body> </html>