CINXE.COM
ggg - Generative Grammar Gadget
<HTML> <HEAD> <meta http-equiv="content-type" content="text-html; charset=utf-8"> <TITLE>ggg - Generative Grammar Gadget</TITLE> <style> h2 {color:#A60000;} h3 {color:#C08700;} h4 {color:#C08700;} h5 {color:#C08700;} h6 {color:#C08700;} tt {color:#A60000; font-weight:bold; font-family:"Gentium";} </style> </HEAD> <BODY BGCOLOR="#FFFFFF" TEXT="#000000"> <form action="" name="theform"> <script language="javascript" type="text/javascript"> // Script (C) 2018 by Mark Rosenfelder. var theform; var rulez; var ruleCats; var letters = true; var delib = false; var morph; var morphlist; var optcat = true; // Random int from 0 to n function randomInt(max) { return Math.floor(Math.random() * max) ; } // Does t have any nonterminal symbols? function hasNonterms(t) { var tokenz = tokenize(t); for (var t = 0; t < tokenz.length; t++) { for (var r = 0; r < ruleCats.length; r++) { var catz = tokenize(ruleCats[r]); var matches = true; for (var j=0; matches && j < catz.length; j++) if (r+j >= ruleCats.length) matches = false; else matches = tokenz[t+j] = catz[r+j]; if (matches) return true; } } return false; } // Given a rule S=AB/E_, return the environment (here E_) function rule2env(r) { var parse = r.split("="); if (parse.length == 3) return parse[2]; else return ""; } // Given a rule S=AB, return the category (S) function rule2cat(r) { var parse = r.split("="); if (parse.length >= 2) return parse[0]; else return ""; } // Given a rule S=AB, return the substitution string (AB) function rule2sub(r) { var parse = r.split("="); if (parse.length >= 2) return parse[1]; else return ""; } // Split a string into tokens // e.g. AB -> A | B // NP N S -> NP | N | S // STD special handling for parens, act as if they have a space around them function tokenize(r) { if (letters) { return r.split(""); } else return r.split(" "); } var appAt = 0; var tokenz; var rtokenz; var star; // Add a token and (if required) a space function addToken(u, t) { if (t != "") { u += t; if (!letters) u += " "; } return u; } // Apply the rule to t // If the category appears multiple times, keepon tells us whether to reapply it to each instance. function apply(t, r) { // Use canapply to tokenize and find where the rule category lives. if (!canapply(t, r)) return t; var u = ""; var found = false; for (var i = 0; i < tokenz.length; i++) { if (i == appAt) { // Cat(s) found here var target = rule2sub(r); if (optcat && target.indexOf('(') != -1) { // Rule has optional elements in parens. Choose some randomly. targetz = tokenize(target); var skip = false; for (var j = 0; j < targetz.length; j++) { if (targetz[j] == "(") skip = randomInt(2) == 0; else if (targetz[j] == ")") skip = false; else if (!skip) u = addToken(u, targetz[j] ); } } else if (target.indexOf('*') != -1) { // Rule has a wildcard; need to go token by token var stokenz = tokenize(target); for (var j = 0; j < stokenz.length; j++) { if (stokenz[j].charAt(0) == "*") { u = addToken(u, star); if (star == "") i -= 1; } else u = addToken(u, stokenz[j]); } } else if (target != "ø") { u = addToken(u, target); } i += rtokenz.length - 1; } else { u = addToken(u, tokenz[i]); } } return u; } // Tell if the category in rule r *can* apply to this string function canapply(t, r) { tokenz = tokenize(t); rtokenz = tokenize(rule2cat(r)); var env = rule2env(r); if (env != "") { // If env doesn't contain _, just test if it's present anywhere var present = false; for (var k = 0; k < tokenz.length; k++) { if (tokenz[k] == env) present = true; } if (!present) return false; } for (var i = 0; i < tokenz.length; i++) { var match = true; appAt = i; for (var j = 0; match && j < rtokenz.length; j++) if (i + j < tokenz.length) { if (rtokenz[j] == "*") { // Simple * matches anything star = tokenz[i+j]; match = true; } else if (rtokenz[j].charAt(0) == '*') { // *V always matches, but star is retained only if it's not a V var notThis = rtokenz[j].substr(1); if (notThis != tokenz[i+j]) star = tokenz[i+j]; else star = ""; } else match = rtokenz[j] == tokenz[i+j]; } else match = false; if (match) return true; } return false; } // Does the found list include this output? function isFound(found, s) { for (var i = 0; i < found.length; i++) { if (found[i] == s) return true; } return false; } // Get the nth string in this morph rule (tokenized in rtokenz) function GetMorph(n) { if (n < rtokenz.length) return rtokenz[n]; else return rtokenz[1]; } // Is this string in the list of affixes? // If so, return its index. // If not, return -1. function AffixN(tt) { if (morph.length > 1 && morphlist.length > 1) { for (var i = 0; i < morphlist.length; i++) if (tt == morphlist[i]) return i; } return -1; } // Apply a morphological rule // e.g µ take takes took taken taking // means "VPL take" > takes, "en take" > taken, etc function DoMorph(t, r) { tokenz = tokenize(t); rtokenz = tokenize(r); if (rtokenz.length < 3) return t; var v = rtokenz[1]; var changed = false; var affix = ""; for (var i = 0; i < tokenz.length; i++) { var tt = tokenz[i]; var x = AffixN(tt); if (x != -1) affix = x; else { if (affix != -1 && tt == v) { tokenz[i-1] = ""; tokenz[i] = GetMorph(affix); changed = true; } affix = -1; } } if (changed) { var u = ""; for (var i = 0; i < tokenz.length; i++) { if (tokenz[i] != "") { u += tokenz[i]; if (i+1 < tokenz.length) u += " "; } } return u; } else return t; } // Rule ordering: divide rules into ordering groups with +. // We go past each + when no rules before it apply. // User hit the action button. Make things happen! function process() { //Read parameters theform = document.theform; letters = theform.nts[0].checked; delib = theform.debug.checked; optcat = theform.optcat.checked; var showrules = theform.showrules.checked; // Read the production rules var s = theform.rules.value; var linez = s.split("\n"); rulez = new Array(); ruleCats = new Array(); morph = new Array(); var gotopt = false; var r; for (r = 0; r < linez.length; r++) { var ru = linez[r]; if (ru.charAt(0) == 'µ') { morph.push(ru); continue; } if (ru == "+" || ru == "++" || ru == "?" || ru == "1" || ru == "1?") { gotopt = (ru == "?" || ru == "1?"); rulez.push(ru); continue; } // To simplify further parsing ru = ru.replace("→", "="); ru = ru.replace("/", "="); var cat = rule2cat(ru); var env = rule2env(ru); var env1 = ""; var env2 = ""; var env3 = ""; if (env != "") { var parts = env.split("_"); if (parts.length > 1) { env1 = parts[0]; env2 = parts[1]; } else env3 = "=" + env; } if (cat != "") { var subs = rule2sub(ru).split("|"); for (var k = 0; k < subs.length; k++) { rulez.push( env1 + cat + env2 + "=" + env1 + subs[k] + env2 + env3 ); ruleCats.push(cat); } } } // If last separator was ? then add + to avoid special handling of dangling ? rules if (gotopt) rulez.push("+"); // Output s = ""; if (rulez.length == 0) s = "No rules found"; else { var found = new Array(); // Keep track of once-only rules // 0 = can apply multiple times // 1 = can apply once, hasn't applied yet // 2 = can apply once, already applied var applied = new Array(); var once = false; for (var r=0; r < rulez.length; r++) { var ru = rulez[r]; if (ru == "1" || ru == "1?") once = true; else if (ru == "+" || ru == "++" || ru == "?") once = false; applied.push(once ? 1 : 0); // Also, if asked, display the rules if (showrules) s += "Rule " + ru + "<br>"; } // Try to create sentences, up to max times var max = delib ? 1 : 50; var optional = false; var keepon = false; var lastkeepon = false; for (var i = 0; i < max; i++) { var t = "S"; var n = 0; // emergency brake // Reset applied array for (r = 0; r < rulez.length; r++) { if (applied[r] > 0) applied[r] = 1; } // While rules can apply, apply them while (hasNonterms(t)) { var alts = new Array() ; // Need to see which rules possibly apply for (r = 0; r < rulez.length; r++) { if (rulez[r] == "?" || rulez[r] == "1?") { // Beginning of an optional set of rules. keepon = false; if (alts.length > 0) break; optional = true; } else if (rulez[r] == "+" || rulez[r] == "++" || rulez[r] == "1") { keepon = rulez[r] == "++"; // Beginning of a normal set of rules. if (alts.length > 0) { // If last rule was optional, apply it 1/3 of the time if (optional && Math.random() > 0.66) { if (delib) s += "Not applying optional rule " + alts[0] + "</br>"; alts = new Array(); // erase alts } else break; } optional = false; } else { // Here's a rule. Add to stack if it can be applied. if (applied[r] < 2) { if (canapply(t, rulez[r])) { alts.push(rulez[r]); lastkeepon = keepon; if (applied[r] > 0) // Handle apply-once rules applied[r] = 2; } } } } if (alts.length == 0) break; // Choose and apply one of the possible rules var j = 0; if (alts.length > 1) j = randomInt(alts.length); var newt = apply(t, alts[j]); if (lastkeepon) { // Rules can be marked as applying as long as they can for (k = 0; k < 10 && newt != t; k++) { if (delib) s += "Reapplying "; t = newt; newt = apply(t, alts[j]); } } t = newt; // Show deliberations if (delib) { s += "Can apply:"; if (lastkeepon) s+= " (multiply) "; else s+= "(once) "; for (var i = 0; i < alts.length; i++) { if (i == j) s += "<b>"; s += alts[i] + " "; if (i == j) s += "</b>"; } s += "<br/> t ← " + t + "<br/>"; } if (++n == 500) { s += "Aborting, too long - currently "; break; } } // Morphological rules. First one is key to affixes if (morph.length > 1) { morphlist = morph[0].split(" "); if (delib) { s += "Applying morphological rules: "; if (morphlist.length > 1) for (var z = 1; z < morphlist.length; z++) s += morphlist[z] + " "; s += "<br/>"; } if (morphlist.length > 1) { for (var r = 1; r < morph.length; r++) t = DoMorph(t, morph[r]); } } // No more nonterminals; output string if (!isFound(found,t)) { s += t + "<br/>"; found.push(t); } } } // Set the output field document.getElementById("mytext").innerHTML = s; } function helpme() { window.open("ggghelp.html"); } function frenchme() { window.open("gggfrench.html"); } function string2rules(data) { var s = ""; for (var i = 0; i < data.length; i++) { s += data[i]; if (i < data.length - 1) s += "\n"; } var ta = document.getElementById("rules") ta.value = s; theform.nts[1].checked = true; theform.optcat.checked = true; } function quickNP() { var data = [ "S=Det Num Adjs Npl", "Npl=dogs|cats", "Det=ø|the|these|those", "Num=two|three|four", "Adjs=Adj|Adj Adj", "Adj=big|friendly|small|nasty"]; string2rules(data); } function ssVerbs() { var data = [ "S=NP Verb NP", "NP=NPS|NPP", "Verb=Aux V", "Aux=Tense ( Modal ) ( have en ) ( be ing ) ( be en )", "Modal=will|can|may|shall|must", "Tense=past", "Tense=VPL/NPS _", "Tense=ø/NPP _", "+", "en V NPS=en V by NPS", "en V NPP=en V by NPP", "+", "V=hit|take|eat|read", "+", "NPS=the man|the woman|the book|the dog", "NPP=the men|the women|the books|the dogs", "+", "µ µ VPL past en ing ø", "µ have has had had having", "µ take takes took taken taking", "µ hit hits hit hit hitting", "µ be is were been being are", "µ read reads read read reading", "µ eat eats ate eaten eating", "µ may may might - -", "µ can can could - - ", "µ will will would - - ", "µ shall shall should - -", "µ must must must - -" ]; string2rules(data); } function ssAll() { var data = [ "S=NP Verb ( Prt ) NP", "Verb=Aux V", "+", "Aux=Tense ( Modal ) ( have en ) ( be ing )", "1?", "Tense *V=Tense *V not", "1?", "NP Tense *V=Tense *V NP", "+", "Tense not=Tense do not", "Tense NP=Tense do NP", "+", "NP=NPS|NPP", "NP=NPS and NPS", "+", "Tense=past", "Tense=VPL/NPS _", "Tense=ø/NPP _", "+", "V by Prt=V Prt by", "+", "V Prt=take in|bring in", "+", "Modal=will|can|may|shall|must", "V=hit|take|eat|read|bring", "+", "NPS=the man|the woman|the book|the dog|the cat", "NPP=the men|the women|the books|the dogs|the cats", "+", "µ µ VPL past en ing ø", "µ have has had had having", "µ take takes took taken taking", "µ hit hits hit hit hitting", "µ bring brings brought brought bringing", "µ be is were been being are", "µ read reads read read reading", "µ eat eats ate eaten eating", "µ may may might", "µ can can could", "µ will will would", "µ shall shall should", "µ must must must", "µ do does did done doing" ]; string2rules(data); } function french() { var data = [ "S=NP Fin VP", "1", "Fin VP=Fin VP Fin", "+", "VP=( Prons ) ( Neg ) ( Parfait ) V", "+", "Neg V *=ne V * pas", "Neg Parfait V *=ne avoir * pas é V", "+", "Parfait V *=avoir * é V", "+", "avoir Fin=Fin avoir", "V Fin=Fin V", "++", "Fin=1s|2s|3s|1p|2p|3p", "+", "Prons ne=ne Prons", "+", "Prons=( Acc ) ( Dat )", "+", "V=donner|dire/Dat", "V=prendre", "+", "NP 1s=je", "NP 2s=tu", "NP 3s=il", "NP 1p=nous", "NP 2p=vous", "NP 3p=ils", "Acc=me|te|le|la|nous|vous", "Dat=me|te|lui|nous|vous", "+", "µ µ 1s 2s 3s 1p 2p 3p é", "µ avoir ai as a avons avez ont eu", "µ donner donne donnes donne donnons donnez donnent donné", "µ dire dis dis dit disons dites disent dit", "µ prendre prends prends prend prenons prenez prennent pris" ]; string2rules(data); } function apb() { var data = [ "S=A B", "A=A a|a", "B=B b|b", "B=p/A _" ]; string2rules(data); theform.nts[0].checked = true; theform.nts[1].checked = false; } function xpress() { var data = [ "S=A", "A=L|A O A|B", "B=D|(A)|-D|L[A]", "O=-|+|÷|×", "A=L", "L=E|EE|EEE|ED", "E=a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z", "D=DD|0|1|2|3|4|5|6|7|8|9" ]; string2rules(data); theform.nts[0].checked = true; theform.nts[1].checked = false; theform.optcat.checked = false; } // Our easiest rules: //S=AB|ASB //A=a //B=b </script> <table width="100%"> <tr><td bgcolor="#EEC25A"> <h2><br> <a href="kit.html"><img src="kit-gears.gif" border=0 align="absmiddle" height="53" width="60"></a> ggg</h2></td></tr> </td></tr> </table> <i>This is a Javascript program which allows you to define the production rules of a generative grammar and discover their output. Press Apply to test, and Help for documentation. <p>Also see: <A href="markov.html">Markov generator</a>; <a href="gtg.html">Generative Tree Gadget<a>; <a href="mg.html">Minimalism gadget</a>. <p>—Mark Rosenfelder, 2018</i> <hr> <table width="100%"> <tr> <td> Production rules: <br><textarea id="rules" name="rules" rows="15" cols="60"> S=AB A=Aa|a B=Bb|b + b=p/a_ 1 p*=p*c</textarea> </td> <td width="30px"> </td> <td> Nonterminal symbols (all caps) are: <br><input type="radio" name="nts" value="letter" checked>Single letters</input> <br><input type="radio" name="nts" value="word">Words (space-separated)</input> <p><input type="checkbox" name="showrules">Show parsed rules</input> <p><input type="checkbox" name="debug">Show debugging output</input> <p><input type="checkbox" name="optcat" checked>Allow optional symbols with ()</input> </td> <td width="30px"> </td> <td> Some datasets: <br><input type="button" value="aaapbbb" onClick="apb();"> <br><input type="button" value="Math expressions" onClick="xpress();"> <br><input type="button" value="Simple NPs" onClick="quickNP();"> <br><input type="button" value="SS Verb Complex" onClick="ssVerbs();"> <br><input type="button" value="SS Sentences" onClick="ssAll();"> <br><input type="button" value="French verbs" onClick="french();"> <td width="30px"> </td> <td> <p><input type="button" value="Apply" onClick="process();"> <p><input type="button" value="Help me!" onClick="helpme();"> <p><input type="button" value="French data" onClick="frenchme();"> </td> </tr></table> <hr> <h3>Output</h3> <br><div id="mytext"> </div> </form> <hr> <center><A HREF="default.html"><img src="homeg.gif" border=0 alt="Home"></A></center> </body> </html>