CINXE.COM

gen - language text generator

<HTML> <meta http-equiv="content-type" content="text-html; charset=utf-8"> <HEAD> <TITLE>gen - language text generator</TITLE> <style> h2 {color:#A60000;} h3 {color:#C08700;} h4 {color:#C08700;} h5 {color:#C08700;} h6 {color:#C08700;} tt {color:#A60000; font-weight:bold; font-family:"Gentium";} </style> </HEAD> <BODY BGCOLOR="#FFFFFF" TEXT="#000000"> <form action="" name="theform"> <script language="javascript" type="text/javascript"> // Script (C) 2012 by Mark Rosenfelder. // You can modify the code for non-commercial use; // attribution would be nice. // If you want to make money off it, please contact me. var NLEX = 150; var NSENT = 30; var s; var ss; var punc = ".?!"; var cat; var ncat; var syl; var nsyl; var catindex = ""; var badcats; var monosyl = 0.0; var dropoff = 50; var rew; var nrew; var showsyl = 0; var slowsyl = 0; var syldrop = 50; var defN = 0; function find(s, ch) { for (var i = 0; i < s.length; i++) { if (s.charAt(i) == ch) { return i; } } return -1; } function readStuff() { var theform = document.theform; // Parse the category list cat = theform.cats.value.split("\n"); ncat = cat.length; badcats = false; // Make sure cats have structure like V=aeiou catindex = ""; var w; for (w = 0; w < ncat; w++) { // A final empty cat can be ignored thiscat = cat[w]; if (thiscat.charCodeAt(thiscat.length - 1) == 13) { thiscat = thiscat.substr(0, thiscat .length - 1); cat[w] = thiscat; } if (thiscat.length == 0 && w == ncat - 1) { ncat--; } else if (thiscat.length < 3) { badcats = true; } else { if (find(thiscat , "=") == -1) { badcats = true; } else { catindex += thiscat.charAt(0); } } } // Parse the syllable list syl = theform.syls.value.split("\n"); nsyl = syl.length; for (w = 0; w < nsyl; w++) { var t = syl[w]; if (t.charCodeAt(t.length - 1) == 13) { syl[w] = t.substr(0, t.length - 1); } } } // A random percentage function randpct() { var r = Math.floor(Math.random()*101); return r; } // Apply rewrite rules function rewriterules() { var w; for (w = 0; w < nrew; w++) { if (rew[w].length > 2 && find(rew[w], "|") != -1) { var parse = rew[w].split("|"); // for case insensitivity change to "gi" var regex = new RegExp(parse[0], "g"); s = s.replace(regex, parse[1]); } } } // Apply rewrite rules on just one string function rewriterulesStr(s) { var w; for (w = 0; w < nrew; w++) { if (rew[w].length > 2 && find(rew[w], "|") != -1) { var parse = rew[w].split("|"); // for case insensitivity change to "gi" var regex = new RegExp(parse[0], "g"); s = s.replace(regex, parse[1]); } } return s; } // Cheap iterative implementation of a power law: // our chances of staying at a bin are pct %. function PowerLaw(max, pct) { var r; for (r = 0; true; r = (r+1) % max) { if (randpct() < pct) return r; } return 0; } // Similar, but there's a peak at mode. function PeakedPowerLaw(max, mode, pct) { if (Math.random() > 0.5) { // going upward from mode return mode + PowerLaw(max - mode, pct); } else { // going downward from mode return mode - PowerLaw(mode + 1, pct); } } // Output a single syllable - this is the guts of the program function Syllable() { // Choose the pattern var r = PowerLaw(nsyl, syldrop); var pattern = syl[r]; // For each letter in the pattern, find the category var c; for (c = 0; c < pattern.length; c++) { var theCat = pattern.charAt(c); // Go find it in the categories list var ix = find(catindex, theCat); if (ix == -1) { // Not found: output syllable directly s += theCat; } else { // Choose from this category var expansion = cat[ix].substr(2); var r2; if (dropoff == 0) { r2 = Math.random() * expansion.length; } else { r2 = PowerLaw(expansion.length, dropoff); } var ch = expansion.charAt(r2); s += ch; } } } // Output a single word function Word(capitalize) { s = ""; var nw = 1; if (monosyl > 0.0) if (Math.random() > monosyl) nw += 1 + PowerLaw(4, 50); var w; for (w = 0; w < nw ; w++) { Syllable(); if (showsyl && w < nw - 1) s += "˙"; } rewriterules(); if (capitalize) s= s.charAt(0).toUpperCase() + s.substring(1); ss += s; } // Output a pseudo-text function CreateText() { var sent, w; var r; var nWord; for (sent = 0; sent < NSENT; sent++) { nWord = 1 + PeakedPowerLaw(15, 5, 50); for (w = 0; w < nWord; w++) { Word(w == 0); if (w == nWord - 1) { ss += punc.charAt(PowerLaw(punc.length, 75)); } ss += " "; } } } // Create a list of NLEX words function CreateLex() { var w; ss += "<table>"; for (w = 0; w < NLEX; w++) { if (w % 10 == 0) { ss += "<tr>"; } ss += "<td>"; Word(false); ss += "</td>"; if (w % 10 == 9) { ss += "</tr>"; } } ss += "</table>"; } function CreateLongLex() { for (w = 0; w < NLEX * 5; w++) { Word(false); ss += "<br/>"; } } var arr = []; // Generate all the syllables following a particular pattern, plus an initial. // 1. Look at the first item in pattern, e.g. V // 2. For each member m of that class (e.g. aeiou)… // a. If it ends the pattern, just generate the word initial + m // b. If not, call genall recursively with m added to the initial, // and a pattern consisting of the rest of the string. function genall(initial, pattern) { if (pattern.length == 0) return; var theCat = pattern[0]; var lastOne = pattern.length == 1; // Find category var ix = find(catindex, theCat); if (ix == -1) { // Not a category, just output it straight if (lastOne) { arr.push(rewriterulesStr(initial + theCat)); } else { genall(initial + theCat, pattern.slice(1)); } } else { // It's a category; iterate over the members var i, m, t; var members = cat[ix].substr(2); for (i = 0; i < members.length; i++) { m = members.charAt(i); if (lastOne) { arr.push(rewriterulesStr(initial + m)); } else { genall(initial + m, pattern.slice(1)); } } } } function CreateAll() { arr = []; s = ""; var t; for (w = 0; w < nsyl; w++) { genall("", syl[w]); } // Sort arr = arr.sort(); // Output var lastel = ""; for (i = 0; i < arr.length; i++) { t = arr[i]; if (i == 0 || t != lastel) { s += t + "<br>"; lastel = t; } } ss = s; arr = []; } // User hit the action button. Make things happen! function process() { //Read parameters var theform = document.theform; var isLong = theform.outtype[2].checked; var isText = theform.outtype[0].checked; var doAll = theform.outtype[3].checked; showsyl = theform.showsyl.checked; slowsyl = theform.slowsyl.checked; monosyl = 0.0; if (theform.monosyl[1].checked) monosyl = 0.85; else if (theform.monosyl[2].checked) monosyl = 0.50; else if (theform.monosyl[3].checked) monosyl = 0.20; else if (theform.monosyl[4].checked) monosyl = 0.07; dropoff = 30; if (theform.dropoff[0].checked) dropoff = 45; else if (theform.dropoff[2].checked) dropoff = 15; else if (theform.dropoff[3].checked) dropoff = 8; else if (theform.dropoff[4].checked) dropoff = 0; // Stuff we can do once ss = ""; readStuff(); rew = theform.rewrite.value.split("\n"); nrew = rew.length; // Syllable dropoff if (slowsyl) { if (nsyl == 2) syldrop = 50; else if (nsyl == 3) syldrop = 40; if (nsyl < 9) syldrop = 46 - nsyl * 4; else syldrop = 11; } else { if (nsyl < 9) syldrop = 60 - nsyl * 5; else syldrop = 12; } // Error checking if (ncat <= 0 || nsyl <= 0) { ss = "You must have both categories and syllables to generate text."; } else if (badcats) { ss = "Categories must be of the form V=aeiou<br>" + "That is, a single letter, an equal sign, then a list of possible expansions."; } else { // Actually generate text if (isText ) { CreateText(); } else if (isLong) { CreateLongLex(); } else if (doAll) { CreateAll(); } else { CreateLex(); } } // Set the output field document.getElementById("mytext").innerHTML = ss; } function erase() { document.getElementById("mytext").innerHTML = ""; } function helpme() { window.open("genhelp.html"); } // Parse the Cat field into the three input fields function parsecat() { var theform = document.theform; cat = theform.cats.value.split("\n"); ncat = cat.length; var osyl= ""; var orew = ""; var ocat = ""; for (w = 0; w < ncat; w++) { var t = cat[w]; if (find(t, "|") != -1) orew += t + "\n"; else if (find(t, "=") != -1) ocat += t + "\n"; else osyl += t + "\n"; } if (osyl == "" && theform.syls.value != "") { alert("No syllable types were found in the categories box, and you have content in the syllable types box. You probably don't want to do a Parse then."); return; } if (orew == "" && theform.rewrite.value != "") { alert("No rewrite rules were found in the categories box, and you have content in the rewrite rules box. You probably don't want to do a Parse then."); return; } theform.cats.value = ocat; theform.rewrite.value = orew; theform.syls.value = osyl; } // Copy all three input fields back into the SC area function intocat() { var theform = document.theform; theform.cats.value = theform.cats.value + "\n" + theform.rewrite.value + "\n" + theform.syls.value + "\n"; } // Display the IPA function showipa() { s = "<font face='Gentium'>&#x00b2; &#x2023; &#x2026; "; for (var i = 0x0250; i <= 0x02af; i++) { s += String.fromCharCode(i) + " "; } for (var i = 0x00c0; i <= 0x0237; i++) { s += String.fromCharCode(i) + " "; } s += "</font>"; document.getElementById("mytext").innerHTML = s; } // Defaults function defaultme() { if (++defN == 6) defN = 1; switch (defN) { case 1: // Large inventory theform.cats.value = "C=ptknslrmbdgfvwyhšzñxčžŋ\nV=aiuoeɛɔâôüö\nR=rly"; theform.syls.value = "CV\nV\nCVC\nCRV"; theform.rewrite.value = "â|ai\nô|au"; break; case 2: // Latinate theform.cats.value = "C=tkpnslrmfbdghvyh\nV=aiueo\nU=aiuôê\nR=rl" + "\nM=nsrmltc\nK=ptkbdg"; theform.syls.value = "CV\nCUM\nV\nUM\nKRV\nKRUM"; theform.rewrite.value = "ka|ca\nko|co\nku|cu\nkr|cr"; break; case 3: // Simple theform.cats.value = "C=tpknlrsmʎbdgñfh\nV=aieuoāīūēō\nN=nŋ"; theform.syls.value = "CV\nV\nCVN"; theform.rewrite.value = "aa|ā\nii|ī\nuu|ū\nee|ē\noo|ō\nnb|mb\nnp|mp"; break; case 4: // Chinese theform.cats.value = "C=ptknlsmšywčhfŋ\nV=auieo\nR=rly" + "\nN=nnŋmktp\nW=io\nQ=ptkč"; theform.syls.value = "CV\nQʰV\nCVW\nCVN\nVN\nV\nQʰVN"; theform.rewrite.value = "uu|wo\noo|ou\nii|iu\naa|ia\nee|ie"; break; case 5: // Original default theform.cats.value = "C=ptkbdg\nR=rl\nV=ieaou"; theform.syls.value = "CV\nV\nCRV"; theform.rewrite.value = "ki|či"; } } </script> <table width="100%"> <tr><td bgcolor="#EEC25A"> <h2><br>&nbsp;&nbsp;<a href="kit.html"><img src="kit-gears.gif" border=0 align="absmiddle" height="53" width="60"></a>&nbsp;Gen</h2></td></tr> </td></tr> </table> <i>This is a Javascript vocabulary generator. Enter your syllable types in the box at the right, using any categories you want (e.g. R for liquids). The <a href="phono.html">Phonology Builder</a> can help with categories. Define the phonemes that make up those categories in the box at the left. Hit Generate to create a random wordlist. Hit the Help button for more. <p>&#8212;Mark Rosenfelder, 2012</i> <hr> <table width="100%"> <tr> <td colspan="2"> Categories: <br><textarea id="cats" name="cats" rows="10" cols="50"> C=ptkbdg R=rl V=ieaou</textarea> </td> <td> Rewrite rules: <br><textarea id="rewrite" name="rewrite" rows="10" cols="20"> ki|&#x010d;i</textarea> <td rowspan="3"> Syllable types: <br> <textarea name="syls" rows="20" cols="20"> CV V CRV</textarea> </td> </tr> <tr><td> <p>Output type: <br><input type ="radio" name="outtype" value="text" checked>Text output <br><input type ="radio" name="outtype" value="dict">Wordlist (as table) <br><input type ="radio" name="outtype" value="longdict">Big-ass wordlist <br><input type ="radio" name="outtype" value="genall">All possible syllables <p><input type="checkbox" name="showsyl">Show syllables</input> <br><input type="checkbox" name="slowsyl">Slow syllable dropoff</input> </td><td> <p>Dropoff: <br><input type ="radio" name="dropoff" value="F">Fast <br><input type ="radio" name="dropoff" value="M" checked>Medium <br><input type ="radio" name="dropoff" value="S">Slow <br><input type ="radio" name="dropoff" value="L">Molasses <br><input type ="radio" name="dropoff" value="E">Equiprobable </td><td> <p>Monosyllables: <br><input type ="radio" name="monosyl" value="A">Always <br><input type ="radio" name="monosyl" value="M">Mostly <br><input type ="radio" name="monosyl" value="F">Frequent <br><input type ="radio" name="monosyl" value="LF" checked>Less frequent <br><input type ="radio" name="monosyl" value="R">Rare </td></tr> <tr><td colspan="3"> <p><input type="button" value="Generate" onClick="process();"> &nbsp;<input type="button" value="Clear" onClick="erase();"> &nbsp;<input type="button" value="Help me!" onClick="helpme();"> &nbsp;<input type="button" value="Parse cats" onClick="parsecat();"> &nbsp;<input type="button" value="Back to cats" onClick="intocat();"> &nbsp;<input type="button" value="IPA" onClick="showipa();"> &nbsp;<input type="button" value="Defaults" onClick="defaultme();"> </td></tr></table> <hr> <h3>Output</h3> <br><div id="mytext"> </div> </form> <hr> <center><A HREF="default.html"><img src="homeg.gif" border=0 alt="Home"></A></center> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10