CINXE.COM

ja:devel:parser [DokuWiki]

<!DOCTYPE html> <html lang="ja" dir="ltr" class="no-js"> <head> <meta charset="utf-8" /> <title>ja:devel:parser [DokuWiki]</title> <meta name="generator" content="DokuWiki"/> <meta name="theme-color" content="#008800"/> <meta name="robots" content="index,follow"/> <meta name="keywords" content="ja,devel,parser"/> <link rel="search" type="application/opensearchdescription+xml" href="/lib/exe/opensearch.php" title="DokuWiki"/> <link rel="start" href="/"/> <link rel="contents" href="/ja:devel:parser?do=index" title="サイトマップ"/> <link rel="manifest" href="/lib/exe/manifest.php"/> <link rel="alternate" type="application/rss+xml" title="最近の変更" href="/feed.php"/> <link rel="alternate" type="application/rss+xml" title="現在の名前空間" href="/feed.php?mode=list&amp;ns=ja:devel"/> <link rel="edit" title="文書の編集" href="/ja:devel:parser?do=edit"/> <link rel="alternate" type="text/html" title="プレーンHTML" href="/_export/xhtml/ja:devel:parser"/> <link rel="alternate" type="text/plain" title="Wikiマークアップ" href="/_export/raw/ja:devel:parser"/> <link rel="canonical" href="https://www.dokuwiki.org/ja:devel:parser"/> <link rel="stylesheet" href="/lib/exe/css.php?t=dokuwiki&amp;tseed=f1005bad3d81fc9c803c7f93d32a390e"/> <link rel="alternate" hreflang="en" href="https://www.dokuwiki.org/devel:parser"/> <link rel="alternate" hreflang="cs" href="https://www.dokuwiki.org/cs:devel:parser"/> <link rel="alternate" hreflang="es" href="https://www.dokuwiki.org/es:devel:parser"/> <link rel="alternate" hreflang="ko" href="https://www.dokuwiki.org/ko:devel:parser"/> <link rel="alternate" hreflang="ru" href="https://www.dokuwiki.org/ru:devel:parser"/> <link rel="alternate" hreflang="x-default" href="https://www.dokuwiki.org/devel:parser"/> <script >var NS='ja:devel';var JSINFO = {"plugins":{"edittable":{"default columnwidth":""}},"id":"ja:devel:parser","namespace":"ja:devel","ACT":"show","useHeadingNavigation":0,"useHeadingContent":0};(function(H){H.className=H.className.replace(/\bno-js\b/,'js')})(document.documentElement);</script> <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.4/jquery.min.js" defer="defer"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.13.2/jquery-ui.min.js" defer="defer"></script> <script src="/lib/exe/js.php?t=dokuwiki&amp;tseed=f1005bad3d81fc9c803c7f93d32a390e&amp;lang=ja" defer="defer"></script> <meta name="viewport" content="width=device-width,initial-scale=1" /> <link rel="shortcut icon" href="/lib/tpl/dokuwiki/images/favicon.ico" /> <link rel="apple-touch-icon" href="/lib/tpl/dokuwiki/images/apple-touch-icon.png" /> <meta name="verify-v1" content="OVxl3gsCv2MhZqh1cBQyl0JytWXSwXMjyvwc+4w3WtA=" /> <meta name="google-site-verification" content="YhTVK69hW94ZXUtc2zSLPxTkZKbZIn0zK67mz5WQB-E" /> <!-- Global site tag (gtag.js) - Google Analytics --> <script async src="https://www.googletagmanager.com/gtag/js?id=UA-83791-1"></script> <script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'UA-83791-1', { 'anonymize_ip': true }); </script> </head> <body> <div id="dokuwiki__site"><div id="dokuwiki__top" class="site dokuwiki mode_show tpl_dokuwiki showSidebar hasSidebar"> <!-- ********** HEADER ********** --> <header id="dokuwiki__header"><div class="pad group"> <div class="headings group"> <ul class="a11y skip"> <li><a href="#dokuwiki__content">内容へ移動</a></li> </ul> <h1 class="logo"><a href="/start" accesskey="h" title="Home [h]"><img src="/lib/tpl/dokuwiki/images/logo.png" width="64" height="64" alt="" /><span>DokuWiki</span></a></h1> <p class="claim">It's better when it's simple</p> </div> <div class="tools group"> <!-- USER TOOLS --> <div id="dokuwiki__usertools"> <h3 class="a11y">ユーザ用ツール</h3> <ul> <li class="action login"><a href="/ja:devel:parser?do=login&amp;sectok=" title="ログイン" rel="nofollow"><span>ログイン</span><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path d="M10 17.25V14H3v-4h7V6.75L15.25 12 10 17.25M8 2h9a2 2 0 0 1 2 2v16a2 2 0 0 1-2 2H8a2 2 0 0 1-2-2v-4h2v4h9V4H8v4H6V4a2 2 0 0 1 2-2z"/></svg></a></li> </ul> </div> <!-- SITE TOOLS --> <div id="dokuwiki__sitetools"> <h3 class="a11y">サイト用ツール</h3> <form action="/start" method="get" role="search" class="search doku_form" id="dw__search" accept-charset="utf-8"><input type="hidden" name="do" value="search" /><input type="hidden" name="id" value="ja:devel:parser" /><div class="no"><input name="q" type="text" class="edit" title="[F]" accesskey="f" placeholder="検索" autocomplete="on" id="qsearch__in" value="" /><button value="1" type="submit" title="検索">検索</button><div id="qsearch__out" class="ajax_qsearch JSpopup"></div></div></form> <div class="mobileTools"> <form action="/doku.php" method="get" accept-charset="utf-8"><div class="no"><input type="hidden" name="id" value="ja:devel:parser" /><select name="do" class="edit quickselect" title="ツール"><option value="">ツール</option><optgroup label="ページ用ツール"><option value="edit">文書の編集</option><option value="revisions">以前のリビジョン</option><option value="backlink">バックリンク</option></optgroup><optgroup label="サイト用ツール"><option value="recent">最近の変更</option><option value="media">メディアマネージャー</option><option value="index">サイトマップ</option></optgroup><optgroup label="ユーザ用ツール"><option value="login">ログイン</option></optgroup></select><button type="submit">&gt;</button></div></form> </div> <ul> <li class="action recent"><a href="/ja:devel:parser?do=recent" title="最近の変更 [r]" rel="nofollow" accesskey="r">最近の変更</a></li><li class="action media"><a href="/ja:devel:parser?do=media&amp;ns=ja%3Adevel" title="メディアマネージャー" rel="nofollow">メディアマネージャー</a></li><li class="action index"><a href="/ja:devel:parser?do=index" title="サイトマップ [x]" rel="nofollow" accesskey="x">サイトマップ</a></li> </ul> </div> </div> <!-- BREADCRUMBS --> <div class="breadcrumbs"> <div class="trace"><span class="bchead">トレース:</span> <span class="bcsep">•</span> <span class="curid"><bdi><a href="/ja:devel:parser" class="breadcrumbs" title="ja:devel:parser">parser</a></bdi></span></div> </div> <hr class="a11y" /> </div></header><!-- /header --> <div class="wrapper group"> <!-- ********** ASIDE ********** --> <nav id="dokuwiki__aside" aria-label="サイドバー"><div class="pad aside include group"> <h3 class="toggle">サイドバー</h3> <div class="content"><div class="group"> <div class="notify">この翻訳は<a href="/devel:parser" class="wikilink1">元のページ</a>よりも更新日が古く、内容が古い可能性があります。 <a href="/devel:parser?do=diff&amp;rev=1362580355" class="wikilink1">変更点</a>を参照して下さい。</div><div class="plugin_translation is-dropdown"><span class="title">このページの翻訳<sup><a href="/localization" class="wikilink1" title="localization" data-wiki-id="localization">?</a></sup>: </span><ul><li class="a"><a class="wikilink1" title="English" href="/devel:parser">English (en)</a></li><li class="a"><a class="wikilink2" title="|العربية" href="/ar:devel:parser">|العربية (ar)</a></li><li class="a"><a class="wikilink2" title="Català" href="/ca:devel:parser">Català (ca)</a></li><li class="a"><a class="wikilink1" title="Česky" href="/cs:devel:parser">Česky (cs)</a></li><li class="a"><a class="wikilink2" title="Dansk" href="/da:devel:parser">Dansk (da)</a></li><li class="a"><a class="wikilink2" title="Deutsch" href="/de:devel:parser">Deutsch (de)</a></li><li class="a"><a class="wikilink2" title="Ελληνικά" href="/el:devel:parser">Ελληνικά (el)</a></li><li class="a"><a class="wikilink2" title="Esperanto" href="/eo:devel:parser">Esperanto (eo)</a></li><li class="a"><a class="wikilink1" title="Español" href="/es:devel:parser">Español (es)</a></li><li class="a"><a class="wikilink2" title="فارسی" href="/fa:devel:parser">فارسی (fa)</a></li><li class="a"><a class="wikilink2" title="Français" href="/fr:devel:parser">Français (fr)</a></li><li class="a"><a class="wikilink2" title="עברית" href="/he:devel:parser">עברית (he)</a></li><li class="a"><a class="wikilink2" title="Magyar" href="/hu:devel:parser">Magyar (hu)</a></li><li class="a"><a class="wikilink2" title="Italiano" href="/it:devel:parser">Italiano (it)</a></li><li class="span"><span class="wikilink1" title="日本語">日本語 (ja)</span></li><li class="a"><a class="wikilink1" title="한국어" href="/ko:devel:parser">한국어 (ko)</a></li><li class="a"><a class="wikilink2" title="Lietuvių Kalba" href="/lt:devel:parser">Lietuvių Kalba (lt)</a></li><li class="a"><a class="wikilink2" title="Nederlands" href="/nl:devel:parser">Nederlands (nl)</a></li><li class="a"><a class="wikilink2" title="Norsk" href="/no:devel:parser">Norsk (no)</a></li><li class="a"><a class="wikilink2" title="Polski" href="/pl:devel:parser">Polski (pl)</a></li><li class="a"><a class="wikilink2" title="Português" href="/pt-br:devel:parser">Português (pt-br)</a></li><li class="a"><a class="wikilink2" title="Română" href="/ro:devel:parser">Română (ro)</a></li><li class="a"><a class="wikilink1" title="Русский" href="/ru:devel:parser">Русский (ru)</a></li><li class="a"><a class="wikilink2" title="Српски Језик" href="/sr:devel:parser">Српски Језик (sr)</a></li><li class="a"><a class="wikilink2" title="Svenska" href="/sv:devel:parser">Svenska (sv)</a></li><li class="a"><a class="wikilink2" title="ไทย" href="/th:devel:parser">ไทย (th)</a></li><li class="a"><a class="wikilink2" title="Українська" href="/uk:devel:parser">Українська (uk)</a></li><li class="a"><a class="wikilink2" title="中文" href="/zh:devel:parser">中文 (zh)</a></li><li class="a"><a class="wikilink2" title="繁體中文" href="/zh-tw:devel:parser">繁體中文 (zh-tw)</a></li></ul></div> <p> <strong>DokuWiki について知る</strong> </p> <ul> <li class="level1"><div class="li"> <a href="/ja:features" class="wikilink1" title="ja:features" data-wiki-id="ja:features">機能</a> &amp; <a href="/ja:blogroll" class="wikilink1" title="ja:blogroll" data-wiki-id="ja:blogroll">紹介</a> </div> </li> <li class="level1"><div class="li"> <a href="/ja:install" class="wikilink1" title="ja:install" data-wiki-id="ja:install">インストール</a></div> </li> <li class="level1"><div class="li"> <a href="/ja:manual" class="wikilink1" title="ja:manual" data-wiki-id="ja:manual">利用者マニュアル</a> &amp; <a href="/ja:syntax" class="wikilink1" title="ja:syntax" data-wiki-id="ja:syntax">整形記法</a></div> </li> <li class="level1"><div class="li"> <a href="/ja:changes" class="wikilink1" title="ja:changes" data-wiki-id="ja:changes">リリースノート</a></div> </li> <li class="level1"><div class="li"> <a href="/ja:faq" class="wikilink1" title="ja:faq" data-wiki-id="ja:faq">よくある質問</a></div> </li> </ul> <p> <strong>開発</strong> </p> <ul> <li class="level1"><div class="li"> <a href="/ja:extensions" class="wikilink1" title="ja:extensions" data-wiki-id="ja:extensions">機能拡張</a></div> </li> <li class="level1"><div class="li"> <a href="/ja:development" class="wikilink1" title="ja:development" data-wiki-id="ja:development">開発マニュアル</a></div> </li> </ul> <p> <strong>法人利用</strong> </p> <ul> <li class="level1"><div class="li"> <a href="/ja:faq:support" class="wikilink1" title="ja:faq:support" data-wiki-id="ja:faq:support">商用サポート</a></div> </li> <li class="level1"><div class="li"> <a href="/ja:donate" class="wikilink1" title="ja:donate" data-wiki-id="ja:donate">寄付</a></div> </li> </ul> <p> <strong>我々のコミュニティ</strong> </p> <ul> <li class="level1"><div class="li"> <a href="/ja:teams:getting_involved" class="wikilink1" title="ja:teams:getting_involved" data-wiki-id="ja:teams:getting_involved">参加する</a></div> </li> <li class="level1"><div class="li"> <a href="http://forum.dokuwiki.org" class="urlextern" title="http://forum.dokuwiki.org">ユーザフォーラム</a></div> </li> <li class="level1"><div class="li"> <a href="/ja:newsletter" class="wikilink1" title="ja:newsletter" data-wiki-id="ja:newsletter">ニュースレター</a></div> </li> <li class="level1"><div class="li"> <a href="/ja:mailinglist" class="wikilink1" title="ja:mailinglist" data-wiki-id="ja:mailinglist">開発向けメーリングリスト</a></div> </li> </ul> <hr /> <p> <a href="http://www.facebook.com/pages/DokuWiki/52877633616" class="urlextern" title="http://www.facebook.com/pages/DokuWiki/52877633616">Facebook</a>や<a href="http://twitter.com/dokuwiki" class="urlextern" title="http://twitter.com/dokuwiki">Twitter</a>や他の<a href="/ja:social" class="wikilink1" title="ja:social" data-wiki-id="ja:social">ソーシャル・ネットワーク</a>からフォローしてください。 </p> </div></div> </div></nav><!-- /aside --> <!-- ********** CONTENT ********** --> <main id="dokuwiki__content"><div class="pad group"> <div class="pageId"><span>ja:devel:parser</span></div> <div class="page group"> <!-- wikipage start --> <!-- TOC START --> <div id="dw__toc" class="dw__toc"> <h3 class="toggle">目次</h3> <div> <ul class="toc"> <li class="level1"><div class="li"><a href="#dokuwikiパーサについて">Dokuwikiパーサについて</a></div> <ul class="toc"> <li class="level2"><div class="li"><a href="#overview">Overview</a></div> <ul class="toc"> <li class="level3"><div class="li"><a href="#lexer">Lexer</a></div></li> <li class="level3"><div class="li"><a href="#handler">Handler</a></div></li> <li class="level3"><div class="li"><a href="#parser">Parser</a></div></li> <li class="level3"><div class="li"><a href="#instructions_data_format">Instructions Data Format</a></div></li> <li class="level3"><div class="li"><a href="#renderer">Renderer</a></div></li> </ul> </li> <li class="level2"><div class="li"><a href="#examples">Examples</a></div> <ul class="toc"> <li class="level3"><div class="li"><a href="#basic_invokation">Basic Invokation</a></div></li> <li class="level3"><div class="li"><a href="#selecting_text_for_sections">Selecting Text (for sections)</a></div></li> <li class="level3"><div class="li"><a href="#managing_data_file_input_for_patterns">Managing Data File Input for Patterns</a></div></li> <li class="level3"><div class="li"><a href="#testing_links_for_spam">Testing Links for Spam</a></div></li> <li class="level3"><div class="li"><a href="#adding_substitution_syntax">Adding Substitution Syntax</a></div></li> <li class="level3"><div class="li"><a href="#adding_formatting_syntax_with_state">Adding Formatting Syntax (with state)</a></div></li> <li class="level3"><div class="li"><a href="#adding_block-level_syntax">Adding Block-Level Syntax</a></div></li> <li class="level3"><div class="li"><a href="#serializing_the_renderer_instructions">Serializing the Renderer Instructions</a></div></li> <li class="level3"><div class="li"><a href="#serializing_the_parser">Serializing the Parser</a></div></li> </ul> </li> <li class="level2"><div class="li"><a href="#testing">Testing</a></div></li> <li class="level2"><div class="li"><a href="#bugs_issues">Bugs / Issues</a></div> <ul class="toc"> <li class="level3"><div class="li"><a href="#order_of_adding_modes_important">Order of adding modes important</a></div></li> <li class="level3"><div class="li"><a href="#change_to_wordblock">Change to Wordblock</a></div></li> <li class="level3"><div class="li"><a href="#weakest_links">Weakest Links</a></div></li> <li class="level3"><div class="li"><a href="#greedy_tags">Greedy Tags</a></div></li> <li class="level3"><div class="li"><a href="#footnote_across_list">Footnote across list</a></div></li> <li class="level3"><div class="li"><a href="#linefeed_grabbing">Linefeed grabbing</a></div></li> <li class="level3"><div class="li"><a href="#lists_tables_quote_issue">Lists / Tables / Quote Issue</a></div></li> <li class="level3"><div class="li"><a href="#footnotes_and_blocks">Footnotes and blocks</a></div></li> <li class="level3"><div class="li"><a href="#headers">Headers</a></div></li> <li class="level3"><div class="li"><a href="#block_list_issue">Block / List Issue</a></div></li> </ul> </li> <li class="level2"><div class="li"><a href="#todo">TODO</a></div> <ul class="toc"> <li class="level3"><div class="li"><a href="#more_state_to_state_closing_instructions">More State to State Closing Instructions</a></div></li> <li class="level3"><div class="li"><a href="#table_list_quote_sub_modes">Table / List / Quote sub modes</a></div></li> </ul> </li> <li class="level2"><div class="li"><a href="#discussion">Discussion</a></div> <ul class="toc"> <li class="level3"><div class="li"><a href="#enhance_the_parser_with_tree_structure">Enhance the Parser with Tree Structure</a></div></li> </ul></li> </ul></li> </ul> </div> </div> <!-- TOC END --> <h1 class="sectionedit1" id="dokuwikiパーサについて">Dokuwikiパーサについて</h1> <div class="level1"> <p> このドキュメントではDokuwikiパーサの詳細を解説しており、パーサの挙動に手を加えたり、たとえば生成されたHTMLの修正や異なる出力フォーマットの実装など、出力する文書をコントロールできるようにしたい開発者を対象としています。 </p> </div> <div class="secedit editbutton_section editbutton_1"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Dokuwikiパーサについて] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="dokuwikiパーサについて" /><input type="hidden" name="codeblockOffset" value="0" /><input type="hidden" name="range" value="1-368" /><button type="submit" title="Dokuwikiパーサについて">編集</button></div></form></div> <h2 class="sectionedit2" id="overview">Overview</h2> <div class="level2"> <p> The parser breaks down the process of transforming a raw DokuWiki document to the final output document (normally XHTML) into discrete stages. Each stage is represented by one or more PHP classes. </p> <p> Broadly these elements are; </p> <ol> <li class="level1"><div class="li"> Lexer<sup><a href="#fn__1" id="fnt__1" class="fn_top">1)</a></sup>: scans <sup><a href="#fn__2" id="fnt__2" class="fn_top">2)</a></sup> from a DokuWiki document and outputs a sequence of “tokens” <sup><a href="#fn__3" id="fnt__3" class="fn_top">3)</a></sup>, corresponding to the syntax in the document</div> </li> <li class="level1"><div class="li"> Handler<sup><a href="#fn__4" id="fnt__4" class="fn_top">4)</a></sup>: receives the tokens from the Lexer and transforms them into a sequence of “instructions” <sup><a href="#fn__5" id="fnt__5" class="fn_top">5)</a></sup>. It describes how the document should be rendered, from start to finish, without the Renderer needing to keep track of state.</div> </li> <li class="level1"><div class="li"> Parser<sup><a href="#fn__6" id="fnt__6" class="fn_top">6)</a></sup>: “connects up” the Lexer with the Handler, providing the DokuWiki syntax rules as well as the point of access to the system (the Parser::parse() method)</div> </li> <li class="level1"><div class="li"> Renderer<sup><a href="#fn__7" id="fnt__7" class="fn_top">7)</a></sup>: accepts the instructions from the Handler and “draws” the document ready for viewing (e.g. as XHTML)</div> </li> </ol> <p> No mechanism is provided for connecting up the Handler with the Renderer - this needs coding per specific use case. </p> <p> A rough diagram of the relationships between these components; </p> <pre class="code"> +-----------+ +-----------+ | | Input | Client | | Parser |&lt;---------| Code | | | String | | +-----.-----+ +-----|-----+ Modes | /|\ + | Renderer | Input | Instructions| String \|/ | +-----&#039;-----+ +-----------+ | | | | | Lexer |---------&gt;| Handler | | | Tokens | | +-----.-----+ +-----------+ | | +----+---+ | Modes |-+ +--------+ |-+ +--------+ | +--------+</pre> <p> The “Client Code” (code using the Parser) invokes the Parser, giving it the input string. It receives, in return, the list of “Renderer Instructions”, built by the Handler. These can then be fed to some object which implements the Renderer. </p> <p> <strong>Note:</strong> A critical point behind this design is the intent to allow the Renderer to be as “dumb” as possible. It should <em>not</em> need to make further interpretation / modification of the instructions it is given but be purely concerned with rendering some kind of output (e.g. XHTML) - in particular the Renderer should not need to keep track of state. By keeping to this principle, aside from making Renderers easy to implement (the focus being purely on what to output), it will also make it possible for Renderers to be interchangeable (e.g. output PDF as alternative to XHTML). At the same time, the instructions output from the Handler are <em>geared</em> for rendering XHTML and may not be entirely suited for all output formats. </p> </div> <div class="secedit editbutton_section editbutton_2"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Overview] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="overview" /><input type="hidden" name="codeblockOffset" value="0" /><input type="hidden" name="range" value="369-4013" /><button type="submit" title="Overview">編集</button></div></form></div> <h3 class="sectionedit3" id="lexer">Lexer</h3> <div class="level3"> <p> Defined in <code>inc/parser/lexer.php</code> </p> <p> In the most general sense, it provides a tool for managing complex regular expressions, where state is important. The Lexer comes from <a href="http://www.lastcraft.com/simple_test.php" class="urlextern" title="http://www.lastcraft.com/simple_test.php">Simple Test</a> but contains three modifications (read: hacks); </p> <ul> <li class="level1"><div class="li"> support for lookback and lookahead patterns</div> </li> <li class="level1"><div class="li"> support for changing the pattern modifiers from within the pattern</div> </li> <li class="level1"><div class="li"> notifying the Handler of the starting byte index in the raw text, where a token was matched</div> </li> </ul> <p> In short, Simple Test’s lexer acts as a tool to make regular expressions easy to manage - rather than giant regexes you write many small / simple ones. The lexer takes care of combining them efficiently then gives you a SAX-like callback <abbr title="Application Programming Interface">API</abbr> to allow you to write code to respond to matched “events”. </p> <p> The Lexer as a whole is made of three main classes; </p> <ul> <li class="level1"><div class="li"> <code>Doku_LexerParallelRegex</code>: allows regular expressions to be constructed from multiple, separate patterns, each pattern being associated with an identifying “label” , the class combining them into a single regex using subpatterns. <em>When using the Lexer, you should not need to worry about this class</em>.</div> </li> <li class="level1"><div class="li"> <code>Doku_LexerStateStack</code>: provides a simple state machine so that lexing can be “context aware”. <em>When using the Lexer, you should not need to worry about this class</em>.</div> </li> <li class="level1"><div class="li"> <code>Doku_Lexer</code>: provides the point of access for client code using the Lexer. Manages multiple instances of ParallelRegex, using the StateStack to apply the correct ParallelRegex instance, depending on “context”. On encountering “interesting text”, it calls functions on a user provided object (the Handler).</div> </li> </ul> </div> <h4 id="the_need_for_state">The need for state</h4> <div class="level4"> <p> The wiki syntax used in DokuWiki contains markup, “inside” of which only certain syntax rules apply. The most obvious example is the &lt;code/&gt; tag, inside of which no other wiki syntax should be recognized by the Lexer. Other syntax, such as the list or table syntax should allow <em>some</em> markup but not others e.g. you can use links in a list context but not tables. </p> <p> The Lexer provides “state awareness” allowing it to apply the correct syntax rules depending on its current position (the context) in the text it&#039;s scanning. If it sees an opening &lt;code&gt; tag, it should switch to a different state within which no other syntax rules apply (i.e. anything that would normally look like wiki syntax should be treated as “dumb” text) until it finds the close &lt;/code&gt; tag. </p> </div> <h4 id="lexer_modes">Lexer Modes</h4> <div class="level4"> <p> The term <em>mode</em> is a label for a particular lexing state<sup><a href="#fn__8" id="fnt__8" class="fn_top">8)</a></sup>. The code using the Lexer registers one or more regex patterns with a particular named mode. Then, as the Lexer matches those patterns against the text it is scanning, it calls functions on the Handler with the same name as the mode (unless the <code>mapHandler</code> method was used to create an alias - see below). </p> </div> <h4 id="the_lexer_api">The Lexer API</h4> <div class="level4"> <p> A short introduction to the Lexer can be found at <a href="http://www.phppatterns.com/index.php/article/articleview/106/1/2/" class="urlextern" title="http://www.phppatterns.com/index.php/article/articleview/106/1/2/">Simple Test Lexer Notes</a>. This provides more detail. </p> <p> The key methods in the Lexer are; </p> </div> <h5 id="constructor">Constructor</h5> <div class="level5"> <p> Accepts an object reference to the Handler, a name of the initial mode that the Lexer should start in and (optionally) a boolean flag as to whether pattern matching should be case sensitive. </p> <p> Example; </p> <pre class="code php"><span class="re0">$Handler</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> MyHandler<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Lexer</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Lexer<span class="br0">&#40;</span><span class="re0">$Handler</span><span class="sy0">,</span> <span class="st_h">'base'</span><span class="sy0">,</span> <span class="kw4">TRUE</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> <p> Here the initial mode here is called &#039;base&#039;. </p> </div> <h5 id="addentrypattern_addexitpattern">addEntryPattern / addExitPattern</h5> <div class="level5"> <p> Used to register a pattern for entering and exiting a particular parsing mode. For example; </p> <pre class="code php"><span class="co1">// arg0: regex to match - note no need to add start/end pattern delimiters</span> <span class="co1">// arg1: name of mode where this entry pattern may be used</span> <span class="co1">// arg2: name of mode to enter</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addEntryPattern</span><span class="br0">&#40;</span><span class="st_h">'&lt;file&gt;'</span><span class="sy0">,</span><span class="st_h">'base'</span><span class="sy0">,</span><span class="st_h">'file'</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// arg0: regex to match</span> <span class="co1">// arg1: name of mode to exit</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addExitPattern</span><span class="br0">&#40;</span><span class="st_h">'&lt;/file&gt;'</span><span class="sy0">,</span><span class="st_h">'file'</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> <p> The above would allow the &lt;file/&gt; tag to be used from the base mode to enter a new mode (called file). If further modes should be applied while the Lexer is inside the file mode, these would need to be registered with the file mode. </p> <p> <strong>Note</strong>: there&#039;s no need to use pattern start and end delimiters. </p> </div> <h5 id="addpattern">addPattern</h5> <div class="level5"> <p> Used to trigger additional “tokens” inside an existing mode (no transitions). It accepts a pattern and the name of a mode it should be used inside. </p> <p> This is best seen from considering the list syntax in the parser. Lists syntax looks like this in DokuWiki; </p> <pre class="code">Before the list * Unordered List Item * Unordered List Item * Unordered List Item After the list</pre> <p> Using <code>addPattern</code> it becomes possible to match the complete list at once while still exiting correctly and tokenizing each list item; </p> <pre class="code php"><span class="co1">// Match the opening list item and change mode</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addEntryPattern</span><span class="br0">&#40;</span><span class="st_h">'\n {2,}[\*]'</span><span class="sy0">,</span><span class="st_h">'base'</span><span class="sy0">,</span><span class="st_h">'list'</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Match new list items but stay in the list mode</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addPattern</span><span class="br0">&#40;</span><span class="st_h">'\n {2,}[\*]'</span><span class="sy0">,</span><span class="st_h">'list'</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// If it's a linefeed that fails to match the above addPattern rule, exit the mode</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addExitPattern</span><span class="br0">&#40;</span><span class="st_h">'\n'</span><span class="sy0">,</span><span class="st_h">'list'</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> </div> <h5 id="addspecialpattern">addSpecialPattern</h5> <div class="level5"> <p> Used to enter a new mode just for the match then drop straight back into the “parent” mode. Accepts a pattern, a name of a mode it can be applied inside and the name of the “temporary” mode to enter for the match. Typically this would be used if you want to substitute wiki markup with something else. For example to match a smiley like :-) you might have; </p> <pre class="code php"><span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addSpecialPattern</span><span class="br0">&#40;</span><span class="st_h">':-)'</span><span class="sy0">,</span><span class="st_h">'base'</span><span class="sy0">,</span><span class="st_h">'smiley'</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> </div> <h5 id="maphandler">mapHandler</h5> <div class="level5"> <p> Allows a particular named mode to be mapped onto a method with a different name in the Handler. This may be useful when differing syntax should be handled in the same manner, such as the DokuWiki syntax for disabling other syntax inside a particular text block; </p> <pre class="code php"><span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addEntryPattern</span><span class="br0">&#40;</span><span class="st_h">'&lt;nowiki&gt;'</span><span class="sy0">,</span><span class="st_h">'base'</span><span class="sy0">,</span><span class="st_h">'unformatted'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addEntryPattern</span><span class="br0">&#40;</span><span class="st_h">'%%'</span><span class="sy0">,</span><span class="st_h">'base'</span><span class="sy0">,</span><span class="st_h">'unformattedalt'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addExitPattern</span><span class="br0">&#40;</span><span class="st_h">'&lt;/nowiki&gt;'</span><span class="sy0">,</span><span class="st_h">'unformatted'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addExitPattern</span><span class="br0">&#40;</span><span class="st_h">'%%'</span><span class="sy0">,</span><span class="st_h">'unformattedalt'</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Both syntaxes should be handled the same way...</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">mapHandler</span><span class="br0">&#40;</span><span class="st_h">'unformattedalt'</span><span class="sy0">,</span><span class="st_h">'unformatted'</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> </div> <h4 id="subpatterns_not_allowed">Subpatterns Not Allowed</h4> <div class="level4"> <p> Because the Lexer itself uses subpatterns (inside the <code>ParallelRegex</code> class), code <em>using</em> the Lexer cannot. This may take some getting used to but, generally, the <code>addPattern</code> method can be applied for solving the types problems where subpatterns are typically applied. It has the advantage of keeping regexs simpler and thereby easier to manage. </p> </div> <h4 id="syntax_errors_and_state">Syntax Errors and State</h4> <div class="level4"> <p> To prevent “badly formed” (in particular a missing closing tag) markup causing the Lexer to enter a state (mode) which it never leaves, it can be useful to use a lookahead pattern to check for the closing markup first<sup><a href="#fn__9" id="fnt__9" class="fn_top">9)</a></sup>. For example; </p> <pre class="code php"><span class="co1">// Use lookahead in entry pattern...</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addEntryPattern</span><span class="br0">&#40;</span><span class="st_h">'&lt;file&gt;(?=.*&lt;/file&gt;)'</span><span class="sy0">,</span><span class="st_h">'base'</span><span class="sy0">,</span><span class="st_h">'file'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addExitPattern</span><span class="br0">&#40;</span><span class="st_h">'&lt;/file&gt;'</span><span class="sy0">,</span><span class="st_h">'file'</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> <p> The entry pattern checks it can find a closing <code>&lt;/file&gt;</code> tag before it enters the state. </p> </div> <div class="secedit editbutton_section editbutton_3"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Lexer] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="lexer" /><input type="hidden" name="codeblockOffset" value="0" /><input type="hidden" name="range" value="4014-11418" /><button type="submit" title="Lexer">編集</button></div></form></div> <h3 class="sectionedit4" id="handler">Handler</h3> <div class="level3"> <p> Defined in <code>inc/parser/handler.php</code> </p> <p> The Handler is a class providing methods which are called by the Lexer as it matches tokens. It then “fine tunes” the tokens into a sequence of instructions ready for a Renderer. </p> <p> The Handler as a whole contains the following classes: </p> <ul> <li class="level1"><div class="li"> <code>Doku_Handler</code>: all calls from the Lexer are made to this class. For every mode registered with the Lexer, there will be a corresponding method in the Handler</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_CallWriter</code>: provides a layer between the array of instructions (the <code>Doku_Handler::$calls</code> array) and the Handler methods <em>writing</em> the instructions. It will be temporarily replaced with other objects, such as <code>Doku_Handler_List</code>, while lexing is in progress.</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_List</code>: responsible for transforming list tokens into instructions while lexing is still in progress</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_Preformatted</code>: responsible for transforming preformatted tokens (indented text in dokuwiki) into instructions while lexing is still in progress</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_Quote</code>: responsible for transforming blockquote tokens (text beginning with one or more &gt;) into instructions while lexing is still in progress</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_Table</code>: responsible for transforming table tokens into instructions while lexing is still in progress</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_Section</code>: responsible for inserting &#039;section&#039; instructions, based on the position of header instructions, once all lexing has finished - loops once through the complete list of instructions</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_Block</code>: responsible for inserting &#039;p_open&#039; and &#039;p_close&#039; instructions, while being aware of &#039;block level&#039; instructions, once all lexing has finished (i.e. it loops once through the complete list of instructions and inserts more instructions)</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_Toc</code>: responsible for adding table of contents instructions at the start of the sequence, based on header instructions found, once all lexing has finished (i.e. it loops once through the complete list of instructions and inserts more instructions)</div> </li> </ul> </div> <h4 id="handler_token_methods">Handler Token Methods</h4> <div class="level4"> <p> The Handler must provide methods named corresponding to the modes registered with the Lexer (bearing in mind the Lexer <code>mapHandler()</code> method - see above). </p> <p> For example if you registered a file mode with the Lexer like; </p> <pre class="code php"><span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addEntryPattern</span><span class="br0">&#40;</span><span class="st_h">'&lt;file&gt;(?=.*&lt;/file&gt;)'</span><span class="sy0">,</span><span class="st_h">'base'</span><span class="sy0">,</span><span class="st_h">'file'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addExitPattern</span><span class="br0">&#40;</span><span class="st_h">'&lt;/file&gt;'</span><span class="sy0">,</span><span class="st_h">'file'</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> <p> The Handler will need a method like; </p> <pre class="code php"><span class="kw2">class</span> Doku_Handler <span class="br0">&#123;</span> &nbsp; <span class="co4">/** * @param string match contains the text that was matched * @param int state - the type of match made (see below) * @param int pos - byte index where match was made */</span> <span class="kw2">function</span> <a href="http://www.php.net/file"><span class="kw3">file</span></a><span class="br0">&#40;</span><span class="re0">$match</span><span class="sy0">,</span> <span class="re0">$state</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="kw1">return</span> <span class="kw4">TRUE</span><span class="sy0">;</span> <span class="br0">&#125;</span> <span class="br0">&#125;</span></pre> <p> <strong>Note:</strong> a Handler method <em>must</em> return TRUE or the Lexer will halt immediately. This behaviour can be useful when dealing with other types of parsing problem but for the DokuWiki parser, all Handler methods will <em>always</em> return TRUE. </p> <p> The arguments provided to a handler method are; </p> <ul> <li class="level1"><div class="li"> <code>$match</code>: the text that was matched</div> </li> <li class="level1 node"><div class="li"> <code>$state</code>: this is a constant which describes how exactly the match was made;</div> <ol> <li class="level2"><div class="li"> <code>DOKU_LEXER_ENTER</code>: matched an entry pattern (see Lexer::addEntryPattern)</div> </li> <li class="level2"><div class="li"> <code>DOKU_LEXER_MATCHED</code>: matched a pattern (see Lexer::addPattern)</div> </li> <li class="level2"><div class="li"> <code>DOKU_LEXER_UNMATCHED</code>: some text found inside the mode which matched no patterns</div> </li> <li class="level2"><div class="li"> <code>DOKU_LEXER_EXIT</code>: matched an exit pattern (see Lexer::addExitPattern)</div> </li> <li class="level2"><div class="li"> <code>DOKU_LEXER_SPECIAL</code>: matched a special pattern (see Lexer::addSpecialPattern)</div> </li> </ol> </li> <li class="level1"><div class="li"> <code>$pos</code>: this is the byte index (strlen from start) where the <em>start</em> of the token was found. <code>$pos + strlen($match)</code> should give the byte index of the end of the match</div> </li> </ul> <p> As a more complex example, in the Parser the following is defined for matching lists; </p> <pre class="code php"> <span class="kw2">function</span> connectTo<span class="br0">&#40;</span><span class="re0">$mode</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">Lexer</span><span class="sy0">-&gt;</span><span class="me1">addEntryPattern</span><span class="br0">&#40;</span><span class="st_h">'\n {2,}[\-\*]'</span><span class="sy0">,</span><span class="re0">$mode</span><span class="sy0">,</span><span class="st_h">'listblock'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">Lexer</span><span class="sy0">-&gt;</span><span class="me1">addEntryPattern</span><span class="br0">&#40;</span><span class="st_h">'\n\t{1,}[\-\*]'</span><span class="sy0">,</span><span class="re0">$mode</span><span class="sy0">,</span><span class="st_h">'listblock'</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">Lexer</span><span class="sy0">-&gt;</span><span class="me1">addPattern</span><span class="br0">&#40;</span><span class="st_h">'\n {2,}[\-\*]'</span><span class="sy0">,</span><span class="st_h">'listblock'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">Lexer</span><span class="sy0">-&gt;</span><span class="me1">addPattern</span><span class="br0">&#40;</span><span class="st_h">'\n\t{1,}[\-\*]'</span><span class="sy0">,</span><span class="st_h">'listblock'</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> postConnect<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">Lexer</span><span class="sy0">-&gt;</span><span class="me1">addExitPattern</span><span class="br0">&#40;</span><span class="st_h">'\n'</span><span class="sy0">,</span><span class="st_h">'listblock'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span></pre> <p> The <code>listblock</code> method in the Handler <sup><a href="#fn__10" id="fnt__10" class="fn_top">10)</a></sup>, looks like; </p> <pre class="code php"> <span class="kw2">function</span> listblock<span class="br0">&#40;</span><span class="re0">$match</span><span class="sy0">,</span> <span class="re0">$state</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="kw1">switch</span> <span class="br0">&#40;</span> <span class="re0">$state</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="co1">// The start of the list...</span> <span class="kw1">case</span> DOKU_LEXER_ENTER<span class="sy0">:</span> <span class="co1">// Create the List rewriter, passing in the current CallWriter</span> <span class="re0">$ReWriter</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Handler_List<span class="br0">&#40;</span><span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">CallWriter</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Replace the current CallWriter with the List rewriter</span> <span class="co1">// all incoming tokens (even if not list tokens)</span> <span class="co1">// are now diverted to the list</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">CallWriter</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="re0">$ReWriter</span><span class="sy0">;</span> &nbsp; <span class="re0">$this</span><span class="sy0">-&gt;</span>__addCall<span class="br0">&#40;</span><span class="st_h">'list_open'</span><span class="sy0">,</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span><span class="re0">$match</span><span class="br0">&#41;</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="kw1">break</span><span class="sy0">;</span> &nbsp; <span class="co1">// The end of the list</span> <span class="kw1">case</span> DOKU_LEXER_EXIT<span class="sy0">:</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__addCall<span class="br0">&#40;</span><span class="st_h">'list_close'</span><span class="sy0">,</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Tell the List rewriter to clean up</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">CallWriter</span><span class="sy0">-&gt;</span><span class="me1">process</span><span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Restore the old CallWriter</span> <span class="re0">$ReWriter</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">CallWriter</span><span class="sy0">;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">CallWriter</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="re0">$ReWriter</span><span class="sy0">-&gt;</span><span class="me1">CallWriter</span><span class="sy0">;</span> &nbsp; <span class="kw1">break</span><span class="sy0">;</span> &nbsp; <span class="kw1">case</span> DOKU_LEXER_MATCHED<span class="sy0">:</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__addCall<span class="br0">&#40;</span><span class="st_h">'list_item'</span><span class="sy0">,</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span><span class="re0">$match</span><span class="br0">&#41;</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="kw1">break</span><span class="sy0">;</span> &nbsp; <span class="kw1">case</span> DOKU_LEXER_UNMATCHED<span class="sy0">:</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__addCall<span class="br0">&#40;</span><span class="st_h">'cdata'</span><span class="sy0">,</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span><span class="re0">$match</span><span class="br0">&#41;</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="kw1">break</span><span class="sy0">;</span> <span class="br0">&#125;</span> <span class="kw1">return</span> <span class="kw4">TRUE</span><span class="sy0">;</span> <span class="br0">&#125;</span></pre> </div> <h4 id="token_conversion">Token Conversion</h4> <div class="level4"> <p> Part of the fine tuning, performed by the handler, involves inserting / renaming or removing tokens provided by the Lexer. </p> <p> For example, a list like; </p> <pre class="code">This is not a list * This is the opening list item * This is the second list item * This is the last list item This is also not a list</pre> <p> Would result in sequence of tokens something like; </p> <ol> <li class="level1"><div class="li"><code>base: &quot;This is not a list&quot;, DOKU_LEXER_UNMATCHED</code></div> </li> <li class="level1"><div class="li"><code>listblock: &quot;\n *&quot;, DOKU_LEXER_ENTER</code></div> </li> <li class="level1"><div class="li"><code>listblock: &quot; This is the opening list item&quot;, DOKU_LEXER_UNMATCHED</code></div> </li> <li class="level1"><div class="li"><code>listblock: &quot;\n *&quot;, DOKU_LEXER_MATCHED</code></div> </li> <li class="level1"><div class="li"><code>listblock: &quot; This is the second list item&quot;, DOKU_LEXER_UNMATCHED</code></div> </li> <li class="level1"><div class="li"><code>listblock: &quot;\n *&quot;, DOKU_LEXER_MATCHED</code></div> </li> <li class="level1"><div class="li"><code>listblock: &quot; This is the last list item&quot;, DOKU_LEXER_UNMATCHED</code></div> </li> <li class="level1"><div class="li"><code>listblock: &quot;\n&quot;, DOKU_LEXER_EXIT</code></div> </li> <li class="level1"><div class="li"><code>base: &quot;This is also not a list&quot;, DOKU_LEXER_UNMATCHED</code></div> </li> </ol> <p> But to be useful to the Renderer, this has to be converted to the following instructions; </p> <ol> <li class="level1"><div class="li"><code>p_open:</code></div> </li> <li class="level1"><div class="li"><code>cdata: &quot;This is not a list&quot;</code></div> </li> <li class="level1"><div class="li"><code>p_close:</code></div> </li> <li class="level1"><div class="li"><code>listu_open:</code></div> </li> <li class="level1"><div class="li"><code>listitem_open:</code></div> </li> <li class="level1"><div class="li"><code>cdata: &quot; This is the opening list item&quot;</code></div> </li> <li class="level1"><div class="li"><code>listitem_close:</code></div> </li> <li class="level1"><div class="li"><code>listitem_open:</code></div> </li> <li class="level1"><div class="li"><code>cdata: &quot; This is the second list item&quot;</code></div> </li> <li class="level1"><div class="li"><code>listitem_close:</code></div> </li> <li class="level1"><div class="li"><code>listitem_open:</code></div> </li> <li class="level1"><div class="li"><code>cdata: &quot; This is the last list item&quot;</code></div> </li> <li class="level1"><div class="li"><code>listitem_close:</code></div> </li> <li class="level1"><div class="li"><code>list_close:</code></div> </li> <li class="level1"><div class="li"><code>p_open:</code></div> </li> <li class="level1"><div class="li"><code>cdata: &quot;This is also not a list&quot;</code></div> </li> <li class="level1"><div class="li"><code>p_close:</code></div> </li> </ol> <p> In the case of lists, this requires the help of the <code>Doku_Handler_List</code> class, which has its own knowledge of state and is captures the incoming tokens, replacing them with the correct instructions for a Renderer. </p> </div> <div class="secedit editbutton_section editbutton_4"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Handler] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="handler" /><input type="hidden" name="codeblockOffset" value="7" /><input type="hidden" name="range" value="11419-19158" /><button type="submit" title="Handler">編集</button></div></form></div> <h3 class="sectionedit5" id="parser">Parser</h3> <div class="level3"> <p> The Parser acts as the front end to external code and sets up the Lexer with the patterns and modes describing DokuWiki syntax. </p> <p> Using the Parser will generally look like: </p> <pre class="code php"><span class="co1">// Create the parser</span> <span class="re0">$Parser</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Parser<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Create the handler and store in the parser</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">Handler</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Handler<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Add required syntax modes to parser</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'footnote'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Footnote<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'hr'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_HR<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'unformatted'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Unformatted<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="co2"># etc. </span> <span class="re0">$doc</span> <span class="sy0">=</span> <a href="http://www.php.net/file_get_contents"><span class="kw3">file_get_contents</span></a><span class="br0">&#40;</span><span class="st_h">'wikipage.txt.'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$instructions</span> <span class="sy0">=</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">parse</span><span class="br0">&#40;</span><span class="re0">$doc</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> <p> More detailed examples are below. </p> <p> As a whole, the Parser also contains classes representing each available syntax mode, the base class for all of these being <code>Doku_Parser_Mode</code>. The behaviour of these modes are best understood by looking at the examples of adding syntax later in this document. </p> <p> The <em>reason</em> for representing the modes with classes is to avoid repeated calls to the Lexer methods. Without them it would be necessary to hard code each pattern rule for every mode that pattern can be matched in, for example, registering a single pattern rule for the CamelCase link syntax would require something like; </p> <pre class="code php"><span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addSpecialPattern</span><span class="br0">&#40;</span><span class="st_h">'\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b'</span><span class="sy0">,</span><span class="st_h">'base'</span><span class="sy0">,</span><span class="st_h">'camelcaselink'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addSpecialPattern</span><span class="br0">&#40;</span><span class="st_h">'\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b'</span><span class="sy0">,</span><span class="st_h">'footnote'</span><span class="sy0">,</span><span class="st_h">'camelcaselink'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addSpecialPattern</span><span class="br0">&#40;</span><span class="st_h">'\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b'</span><span class="sy0">,</span><span class="st_h">'table'</span><span class="sy0">,</span><span class="st_h">'camelcaselink'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addSpecialPattern</span><span class="br0">&#40;</span><span class="st_h">'\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b'</span><span class="sy0">,</span><span class="st_h">'listblock'</span><span class="sy0">,</span><span class="st_h">'camelcaselink'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addSpecialPattern</span><span class="br0">&#40;</span><span class="st_h">'\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b'</span><span class="sy0">,</span><span class="st_h">'strong'</span><span class="sy0">,</span><span class="st_h">'camelcaselink'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Lexer</span><span class="sy0">-&gt;</span><span class="me1">addSpecialPattern</span><span class="br0">&#40;</span><span class="st_h">'\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b'</span><span class="sy0">,</span><span class="st_h">'underline'</span><span class="sy0">,</span><span class="st_h">'camelcaselink'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="co1">// etc.</span></pre> <p> Each mode that is allowed to contain CamelCase links must be explicitly named. </p> <p> Rather than hard coding this, instead it is implemented using a single class like; </p> <pre class="code php"><span class="kw2">class</span> Doku_Parser_Mode_CamelCaseLink <span class="kw2">extends</span> Doku_Parser_Mode <span class="br0">&#123;</span> &nbsp; <span class="kw2">function</span> connectTo<span class="br0">&#40;</span><span class="re0">$mode</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">Lexer</span><span class="sy0">-&gt;</span><span class="me1">addSpecialPattern</span><span class="br0">&#40;</span> <span class="st_h">'\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b'</span><span class="sy0">,</span><span class="re0">$mode</span><span class="sy0">,</span><span class="st_h">'camelcaselink'</span> <span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="br0">&#125;</span></pre> <p> When setting up the Lexer, the Parser calls the <code>connectTo</code> method on the <code>Doku_Parser_Mode_CamelCaseLink</code> object for every other mode which accepts the CamelCase syntax (some don&#039;t such as the <code>&lt;code /&gt;</code> syntax). </p> <p> At the expense of making the Lexer setup harder to understand, this allows the code to be more flexible when adding new syntax. </p> </div> <div class="secedit editbutton_section editbutton_5"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Parser] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="parser" /><input type="hidden" name="codeblockOffset" value="12" /><input type="hidden" name="range" value="19159-21776" /><button type="submit" title="Parser">編集</button></div></form></div> <h3 class="sectionedit6" id="instructions_data_format">Instructions Data Format</h3> <div class="level3"> <p> The following shows an example of raw wiki text and the corresponding output from the parser; </p> <p> The raw text (contains a table); </p> <pre class="code">abc | Row 0 Col 1 | Row 0 Col 2 | Row 0 Col 3 | | Row 1 Col 1 | Row 1 Col 2 | Row 1 Col 3 | def</pre> <p> When parsed the following PHP array is returned (described below); </p> <pre class="code">Array ( [0] =&gt; Array ( [0] =&gt; document_start [1] =&gt; Array ( ) [2] =&gt; 0 ) [1] =&gt; Array ( [0] =&gt; p_open [1] =&gt; Array ( ) [2] =&gt; 0 ) [2] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; abc ) [2] =&gt; 0 ) [3] =&gt; Array ( [0] =&gt; p_close [1] =&gt; Array ( ) [2] =&gt; 5 ) [4] =&gt; Array ( [0] =&gt; table_open [1] =&gt; Array ( [0] =&gt; 3 [1] =&gt; 2 ) [2] =&gt; 5 ) [5] =&gt; Array ( [0] =&gt; tablerow_open [1] =&gt; Array ( ) [2] =&gt; 5 ) [6] =&gt; Array ( [0] =&gt; tablecell_open [1] =&gt; Array ( [0] =&gt; 1 [1] =&gt; left ) [2] =&gt; 5 ) [7] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; Row 0 Col 1 ) [2] =&gt; 7 ) [8] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; ) [2] =&gt; 19 ) [9] =&gt; Array ( [0] =&gt; tablecell_close [1] =&gt; Array ( ) [2] =&gt; 23 ) [10] =&gt; Array ( [0] =&gt; tablecell_open [1] =&gt; Array ( [0] =&gt; 1 [1] =&gt; left ) [2] =&gt; 23 ) [11] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; Row 0 Col 2 ) [2] =&gt; 24 ) [12] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; ) [2] =&gt; 36 ) [13] =&gt; Array ( [0] =&gt; tablecell_close [1] =&gt; Array ( ) [2] =&gt; 41 ) [14] =&gt; Array ( [0] =&gt; tablecell_open [1] =&gt; Array ( [0] =&gt; 1 [1] =&gt; left ) [2] =&gt; 41 ) [15] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; Row 0 Col 3 ) [2] =&gt; 42 ) [16] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; ) [2] =&gt; 54 ) [17] =&gt; Array ( [0] =&gt; tablecell_close [1] =&gt; Array ( ) [2] =&gt; 62 ) [18] =&gt; Array ( [0] =&gt; tablerow_close [1] =&gt; Array ( ) [2] =&gt; 63 ) [19] =&gt; Array ( [0] =&gt; tablerow_open [1] =&gt; Array ( ) [2] =&gt; 63 ) [20] =&gt; Array ( [0] =&gt; tablecell_open [1] =&gt; Array ( [0] =&gt; 1 [1] =&gt; left ) [2] =&gt; 63 ) [21] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; Row 1 Col 1 ) [2] =&gt; 65 ) [22] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; ) [2] =&gt; 77 ) [23] =&gt; Array ( [0] =&gt; tablecell_close [1] =&gt; Array ( ) [2] =&gt; 81 ) [24] =&gt; Array ( [0] =&gt; tablecell_open [1] =&gt; Array ( [0] =&gt; 1 [1] =&gt; left ) [2] =&gt; 81 ) [25] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; Row 1 Col 2 ) [2] =&gt; 82 ) [26] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; ) [2] =&gt; 94 ) [27] =&gt; Array ( [0] =&gt; tablecell_close [1] =&gt; Array ( ) [2] =&gt; 99 ) [28] =&gt; Array ( [0] =&gt; tablecell_open [1] =&gt; Array ( [0] =&gt; 1 [1] =&gt; left ) [2] =&gt; 99 ) [29] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; Row 1 Col 3 ) [2] =&gt; 100 ) [30] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; ) [2] =&gt; 112 ) [31] =&gt; Array ( [0] =&gt; tablecell_close [1] =&gt; Array ( ) [2] =&gt; 120 ) [32] =&gt; Array ( [0] =&gt; tablerow_close [1] =&gt; Array ( ) [2] =&gt; 121 ) [33] =&gt; Array ( [0] =&gt; table_close [1] =&gt; Array ( ) [2] =&gt; 121 ) [34] =&gt; Array ( [0] =&gt; p_open [1] =&gt; Array ( ) [2] =&gt; 121 ) [35] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; def ) [2] =&gt; 122 ) [36] =&gt; Array ( [0] =&gt; p_close [1] =&gt; Array ( ) [2] =&gt; 122 ) [37] =&gt; Array ( [0] =&gt; document_end [1] =&gt; Array ( ) [2] =&gt; 122 ) )</pre> <p> The top level array is simply a list. Each of its child elements describes a callback function to be executed against the Renderer (see description of the Renderer below) as well as the byte index in the raw input text where that particular “element” of wiki syntax was found. </p> </div> <h4 id="a_single_instruction">A Single Instruction</h4> <div class="level4"> <p> Considering a single child element (which represents a single instruction) from the above list of instructions; </p> <pre class="code"> [35] =&gt; Array ( [0] =&gt; cdata [1] =&gt; Array ( [0] =&gt; def ) [2] =&gt; 122 )</pre> <p> The first element (index 0 ) is the name of a method or function in the Renderer to execute. </p> <p> The second element (index 1) is itself an array, each of <em>its</em> elements being the arguments for the Renderer method that will be called. </p> <p> In this case there is a single argument with the value <code>&quot;def\n&quot;</code>, so the method call would be like; </p> <pre class="code php"><span class="re0">$Render</span><span class="sy0">-&gt;</span><span class="me1">cdata</span><span class="br0">&#40;</span><span class="st0">&quot;def<span class="es1">\n</span>&quot;</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> <p> The third element (index 2) is the byte index of the first character that “triggered” this instruction in the raw text document. It should be the same as the value returned by PHP&#039;s <a href="https://secure.php.net/strpos" class="interwiki iw_phpfn" title="https://secure.php.net/strpos">strpos</a> function. This can be used to retrieve sections of the raw wiki text, based on the positions of the instructions generated from it (example later). </p> <p> <strong>Note:</strong> The Parser&#039;s <code>parse</code> method pads the raw wiki text with a preceding and proceeding linefeed character, to make sure particular Lexer states exit correctly, so you may need to subtract 1 from the byte index to get the correct location in the original raw wiki text. The Parser also normalizes linefeeds to Unix style (i.e. all <code>\r\n</code> becomes <code>\n</code>) so the document the Lexer sees may be smaller than the one you actually fed it. </p> <p> An example of the instruction array of the <a href="/ja:wiki:syntax" class="wikilink1" title="ja:wiki:syntax" data-wiki-id="ja:wiki:syntax">syntax</a> page can be found <a href="/devel:parser:sample_instructions" class="wikilink1" title="devel:parser:sample_instructions" data-wiki-id="devel:parser:sample_instructions">here</a> </p> </div> <div class="secedit editbutton_section editbutton_6"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Instructions Data Format] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="instructions_data_format" /><input type="hidden" name="codeblockOffset" value="15" /><input type="hidden" name="range" value="21777-30828" /><button type="submit" title="Instructions Data Format">編集</button></div></form></div> <h3 class="sectionedit7" id="renderer">Renderer</h3> <div class="level3"> <p> The Renderer is a class (or a collection of functions can be used) which you define. The interface is defined in <code>inc/parser/renderer.php</code> and looks like; </p> <pre class="code php"><span class="kw2">&lt;?php</span> <span class="kw2">class</span> Doku_Renderer <span class="br0">&#123;</span> &nbsp; <span class="co1">// snip</span> &nbsp; <span class="kw2">function</span> <a href="http://www.php.net/header"><span class="kw3">header</span></a><span class="br0">&#40;</span><span class="re0">$text</span><span class="sy0">,</span> <span class="re0">$level</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> section_open<span class="br0">&#40;</span><span class="re0">$level</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> section_close<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> cdata<span class="br0">&#40;</span><span class="re0">$text</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> p_open<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> p_close<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> linebreak<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> hr<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> &nbsp; <span class="co1">// snip</span> <span class="br0">&#125;</span></pre> <p> It is used to document the Renderer although it could be also be extended if you wanted to write a Renderer which only captures certain calls. </p> <p> The basic principle for how the instructions, returned from the parser, are used against a Renderer is similar to the notion of a <a href="https://en.wikipedia.org/wiki/SAX XML API" class="interwiki iw_wp" title="https://en.wikipedia.org/wiki/SAX XML API">SAX XML API</a> - the instructions are a list of function / method names and their arguments. Looping through the list of instructions, each instruction can be called against the Renderer (i.e. the methods provided by the Renderer are <a href="https://en.wikipedia.org/wiki/Callback_(computer_science)" class="interwiki iw_wp" title="https://en.wikipedia.org/wiki/Callback_(computer_science)">callbacks</a>). Unlike the SAX <abbr title="Application Programming Interface">API</abbr>, where only a few, fairly general, callbacks are available (e.g. tag_start, tag_end, cdata etc.), the Renderer defines a more explicit <abbr title="Application Programming Interface">API</abbr>, where the methods typically correspond one-to-one with the act of generating the output. In the section of the Renderer shown above, the <code>p_open</code> and <code>p_close</code> methods would be used to output the tags <code>&lt;p&gt;</code> and <code>&lt;/p&gt;</code> in XHTML, respectively, while the <code>header</code> function takes two arguments - some text to display and the “level” of the header so a call like <code>header(&#039;Some Title&#039;,1)</code> would be output in XHTML like <code>&lt;h1&gt;Some Title&lt;/h1&gt;</code>. </p> </div> <h4 id="invoking_the_renderer_with_instructions">Invoking the Renderer with Instructions</h4> <div class="level4"> <p> It is left up to the client code using the Parser to execute the list of instructions against a Renderer. Typically this will be done using PHP&#039;s <a href="https://secure.php.net/call_user_func_array" class="interwiki iw_phpfn" title="https://secure.php.net/call_user_func_array">call_user_func_array</a> function. For example; </p> <pre class="code php"><span class="co1">// Get a list of instructions from the parser</span> <span class="re0">$instructions</span> <span class="sy0">=</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">parse</span><span class="br0">&#40;</span><span class="re0">$rawDoc</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Create a renderer</span> <span class="re0">$Renderer</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Renderer_XHTML<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Loop through the instructions</span> <span class="kw1">foreach</span> <span class="br0">&#40;</span> <span class="re0">$instructions</span> <span class="kw1">as</span> <span class="re0">$instruction</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="co1">// Execute the callback against the Renderer</span> <a href="http://www.php.net/call_user_func_array"><span class="kw3">call_user_func_array</span></a><span class="br0">&#40;</span><a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span><span class="sy0">&amp;</span><span class="re0">$Renderer</span><span class="sy0">,</span> <span class="re0">$instruction</span><span class="br0">&#91;</span><span class="nu0">0</span><span class="br0">&#93;</span><span class="br0">&#41;</span><span class="sy0">,</span><span class="re0">$instruction</span><span class="br0">&#91;</span><span class="nu0">1</span><span class="br0">&#93;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span></pre> </div> <h4 id="renderer_link_methods">Renderer Link Methods</h4> <div class="level4"> <p> The key Renderer methods for handling the different kinds of link are; </p> <ul> <li class="level1 node"><div class="li"> <code>function camelcaselink($link) {} // $link like &quot;SomePage&quot;</code></div> <ul> <li class="level2"><div class="li">This can probably be ignored for spam checking - it shouldn&#039;t be possible for someone to link offsite with this syntax</div> </li> </ul> </li> <li class="level1 node"><div class="li"> <code>function internallink($link, $title = NULL) {} // $link like &quot;[[syntax]]&quot;</code></div> <ul> <li class="level2"><div class="li">Although <code>$link</code> itself is internal, <code>$title</code> could be an image which is offsite, so needs checking</div> </li> </ul> </li> <li class="level1 node"><div class="li"> <code>function externallink($link, $title = NULL) {}</code></div> <ul> <li class="level2"><div class="li">Both <code>$link</code> and <code>$title</code> (images) need checking</div> </li> </ul> </li> <li class="level1 node"><div class="li"> <code>function interwikilink($link, $title = NULL, $wikiName, $wikiUri) {}</code></div> <ul> <li class="level2"><div class="li">The <code>$title</code> needs checking for images</div> </li> </ul> </li> <li class="level1 node"><div class="li"> <code>function filelink($link, $title = NULL) {}</code></div> <ul> <li class="level2"><div class="li">Technically only valid <code>file://</code> URLs should match but probably best to check anyway plus <code>$title</code> may be an offsite image</div> </li> </ul> </li> <li class="level1 node"><div class="li"> <code>function windowssharelink($link, $title = NULL) {}</code></div> <ul> <li class="level2"><div class="li">Should only match valid Windows share URLs but check anyway plus <code>$title</code> for images</div> </li> </ul> </li> <li class="level1 node"><div class="li"> <code>function email($address, $title = NULL) {}</code></div> <ul> <li class="level2"><div class="li"><code>$title</code> could be an image. Check the email as well?</div> </li> </ul> </li> <li class="level1 node"><div class="li"> <code>function internalmedialink ($src,$title=NULL,$align=NULL,$width=NULL,$height=NULL,$cache=NULL) {}</code></div> <ul> <li class="level2"><div class="li">This shouldn&#039;t need check - should only link to local images. <code>$title</code> itself cannot be an image</div> </li> </ul> </li> <li class="level1 node"><div class="li"> <code>function externalmedialink($src,$title=NULL,$align=NULL,$width=NULL,$height=NULL,$cache=NULL) {}</code></div> <ul> <li class="level2"><div class="li"><code>$src</code> needs checking</div> </li> </ul> </li> </ul> <p> Special attention is required for methods which take the <code>$title</code> argument, which represents the visible text of the link, for example; </p> <pre class="code html4strict"><span class="sc2">&lt;<a href="http://december.com/html/4/element/a.html"><span class="kw2">a</span></a> <span class="kw3">href</span><span class="sy0">=</span><span class="st0">&quot;http://www.example.com&quot;</span>&gt;</span>This is the title<span class="sc2">&lt;<span class="sy0">/</span><a href="http://december.com/html/4/element/a.html"><span class="kw2">a</span></a>&gt;</span></pre> <p> The <code>$title</code> argument can have three possible types of value; </p> <ol> <li class="level1"><div class="li"> <code>NULL</code>: no title was provided in the wiki document.</div> </li> <li class="level1"><div class="li"> string: a plain text string was used as the title</div> </li> <li class="level1"><div class="li"> array (hash): an image was used as the title.</div> </li> </ol> <p> If the <code>$title</code> is an array, it will containing associative values describing the image; </p> <pre class="code php"><span class="re0">$title</span> <span class="sy0">=</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span> <span class="co1">// Could be 'internalmedia' (local image) or 'externalmedia' (offsite image)</span> <span class="st_h">'type'</span><span class="sy0">=&gt;</span><span class="st_h">'internalmedia'</span><span class="sy0">,</span> &nbsp; <span class="co1">// The URL to the image (may be a wiki URL or http://static.example.com/img.png)</span> <span class="st_h">'src'</span><span class="sy0">=&gt;</span><span class="st_h">'wiki:php-powered.png'</span><span class="sy0">,</span> &nbsp; <span class="co1">// For the alt attribute - a string or NULL</span> <span class="st_h">'title'</span><span class="sy0">=&gt;</span><span class="st_h">'Powered by PHP'</span><span class="sy0">,</span> &nbsp; <span class="co1">// 'left', 'right', 'center' or NULL</span> <span class="st_h">'align'</span><span class="sy0">=&gt;</span><span class="st_h">'right'</span><span class="sy0">,</span> &nbsp; <span class="co1">// Width in pixels or NULL</span> <span class="st_h">'width'</span><span class="sy0">=&gt;</span> <span class="nu0">50</span><span class="sy0">,</span> &nbsp; <span class="co1">// Height in pixels or NULL</span> <span class="st_h">'height'</span><span class="sy0">=&gt;</span><span class="nu0">75</span><span class="sy0">,</span> &nbsp; <span class="co1">// Whether to cache the image (for external images)</span> <span class="st_h">'cache'</span><span class="sy0">=&gt;</span><span class="kw4">FALSE</span><span class="sy0">,</span> <span class="br0">&#41;</span><span class="sy0">;</span></pre> </div> <div class="secedit editbutton_section editbutton_7"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Renderer] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="renderer" /><input type="hidden" name="codeblockOffset" value="19" /><input type="hidden" name="range" value="30829-35892" /><button type="submit" title="Renderer">編集</button></div></form></div> <h2 class="sectionedit8" id="examples">Examples</h2> <div class="level2"> <p> The following examples show common tasks that would likely be performed with the parser, as well as raising performance considerations and notes on extending syntax. </p> </div> <div class="secedit editbutton_section editbutton_8"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Examples] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="examples" /><input type="hidden" name="codeblockOffset" value="23" /><input type="hidden" name="range" value="35893-36081" /><button type="submit" title="Examples">編集</button></div></form></div> <h3 class="sectionedit9" id="basic_invokation">Basic Invokation</h3> <div class="level3"> <p> To invoke the parser will <em>all</em> current modes, and parse the DokuWiki syntax document; </p> <pre class="code php"><span class="kw1">require_once</span> DOKU_INC <span class="sy0">.</span> <span class="st_h">'parser/parser.php'</span><span class="sy0">;</span> &nbsp; <span class="co1">// Create the parser</span> <span class="re0">$Parser</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Parser<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Add the Handler</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">Handler</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Handler<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Load all the modes</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'listblock'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_ListBlock<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'preformatted'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Preformatted<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'notoc'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_NoToc<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'header'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Header<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'table'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Table<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="re0">$formats</span> <span class="sy0">=</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a> <span class="br0">&#40;</span> <span class="st_h">'strong'</span><span class="sy0">,</span> <span class="st_h">'emphasis'</span><span class="sy0">,</span> <span class="st_h">'underline'</span><span class="sy0">,</span> <span class="st_h">'monospace'</span><span class="sy0">,</span> <span class="st_h">'subscript'</span><span class="sy0">,</span> <span class="st_h">'superscript'</span><span class="sy0">,</span> <span class="st_h">'deleted'</span><span class="sy0">,</span> <span class="br0">&#41;</span><span class="sy0">;</span> <span class="kw1">foreach</span> <span class="br0">&#40;</span> <span class="re0">$formats</span> <span class="kw1">as</span> <span class="re0">$format</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="re0">$format</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Formatting<span class="br0">&#40;</span><span class="re0">$format</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'linebreak'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Linebreak<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'footnote'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Footnote<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'hr'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_HR<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'unformatted'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Unformatted<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'php'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_PHP<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'html'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_HTML<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'code'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Code<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'file'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_File<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'quote'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Quote<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// These need data files. The get* functions are left to your imagination</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'acronym'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Acronym<span class="br0">&#40;</span><a href="http://www.php.net/array_keys"><span class="kw3">array_keys</span></a><span class="br0">&#40;</span>getAcronyms<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'wordblock'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Wordblock<span class="br0">&#40;</span><a href="http://www.php.net/array_keys"><span class="kw3">array_keys</span></a><span class="br0">&#40;</span>getBadWords<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'smiley'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Smiley<span class="br0">&#40;</span><a href="http://www.php.net/array_keys"><span class="kw3">array_keys</span></a><span class="br0">&#40;</span>getSmileys<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'entity'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Entity<span class="br0">&#40;</span><a href="http://www.php.net/array_keys"><span class="kw3">array_keys</span></a><span class="br0">&#40;</span>getEntities<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'multiplyentity'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_MultiplyEntity<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'quotes'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Quotes<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'camelcaselink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_CamelCaseLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'internallink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_InternalLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'media'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Media<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'externallink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_ExternalLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'emaillink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_EmailLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'windowssharelink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_WindowsShareLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'filelink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_FileLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'eol'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Eol<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Loads the raw wiki document</span> <span class="re0">$doc</span> <span class="sy0">=</span> <a href="http://www.php.net/file_get_contents"><span class="kw3">file_get_contents</span></a><span class="br0">&#40;</span>DOKU_DATA <span class="sy0">.</span> <span class="st_h">'wiki/syntax.txt'</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Get a list of instructions</span> <span class="re0">$instructions</span> <span class="sy0">=</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">parse</span><span class="br0">&#40;</span><span class="re0">$doc</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Create a renderer</span> <span class="kw1">require_once</span> DOKU_INC <span class="sy0">.</span> <span class="st_h">'parser/xhtml.php'</span><span class="sy0">;</span> <span class="re0">$Renderer</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Renderer_XHTML<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co2"># Load data like smileys into the Renderer here </span> <span class="co1">// Loop through the instructions</span> <span class="kw1">foreach</span> <span class="br0">&#40;</span> <span class="re0">$instructions</span> <span class="kw1">as</span> <span class="re0">$instruction</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="co1">// Execute the callback against the Renderer</span> <a href="http://www.php.net/call_user_func_array"><span class="kw3">call_user_func_array</span></a><span class="br0">&#40;</span><a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span><span class="sy0">&amp;</span><span class="re0">$Renderer</span><span class="sy0">,</span> <span class="re0">$instruction</span><span class="br0">&#91;</span><span class="nu0">0</span><span class="br0">&#93;</span><span class="br0">&#41;</span><span class="sy0">,</span><span class="re0">$instruction</span><span class="br0">&#91;</span><span class="nu0">1</span><span class="br0">&#93;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="co1">// Display the output</span> <span class="kw1">echo</span> <span class="re0">$Renderer</span><span class="sy0">-&gt;</span><span class="me1">doc</span><span class="sy0">;</span></pre> </div> <div class="secedit editbutton_section editbutton_9"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Basic Invokation] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="basic_invokation" /><input type="hidden" name="codeblockOffset" value="23" /><input type="hidden" name="range" value="36082-39090" /><button type="submit" title="Basic Invokation">編集</button></div></form></div> <h3 class="sectionedit10" id="selecting_text_for_sections">Selecting Text (for sections)</h3> <div class="level3"> <p> The following shows how to select a range of text from the raw document using instructions from the parser; </p> <pre class="code php"><span class="co1">// Create the parser</span> <span class="re0">$Parser</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Parser<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Add the Handler</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">Handler</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Handler<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Load the header mode to find headers</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'header'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Header<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Load the modes which could contain markup that might be</span> <span class="co1">// mistaken for a header</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'listblock'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_ListBlock<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'preformatted'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Preformatted<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'table'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Table<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'unformatted'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Unformatted<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'php'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_PHP<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'html'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_HTML<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'code'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Code<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'file'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_File<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'quote'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Quote<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'footnote'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Footnote<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'internallink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_InternalLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'media'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Media<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'externallink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_ExternalLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'email'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Email<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'windowssharelink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_WindowsShareLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'filelink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_FileLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Loads the raw wiki document</span> <span class="re0">$doc</span> <span class="sy0">=</span> <a href="http://www.php.net/file_get_contents"><span class="kw3">file_get_contents</span></a><span class="br0">&#40;</span>DOKU_DATA <span class="sy0">.</span> <span class="st_h">'wiki/syntax.txt'</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Get a list of instructions</span> <span class="re0">$instructions</span> <span class="sy0">=</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">parse</span><span class="br0">&#40;</span><span class="re0">$doc</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Use this to watch when we're inside the section we want</span> <span class="re0">$inSection</span> <span class="sy0">=</span> <span class="kw4">FALSE</span><span class="sy0">;</span> <span class="re0">$startPos</span> <span class="sy0">=</span> <span class="nu0">0</span><span class="sy0">;</span> <span class="re0">$endPos</span> <span class="sy0">=</span> <span class="nu0">0</span><span class="sy0">;</span> &nbsp; <span class="co1">// Loop through the instructions</span> <span class="kw1">foreach</span> <span class="br0">&#40;</span> <span class="re0">$instructions</span> <span class="kw1">as</span> <span class="re0">$instruction</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="kw1">if</span> <span class="br0">&#40;</span> <span class="sy0">!</span><span class="re0">$inSection</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="co1">// Look for the header for the &quot;Lists&quot; heading</span> <span class="kw1">if</span> <span class="br0">&#40;</span> <span class="re0">$instruction</span><span class="br0">&#91;</span><span class="nu0">0</span><span class="br0">&#93;</span> <span class="sy0">==</span> <span class="st_h">'header'</span> <span class="sy0">&amp;&amp;</span> <a href="http://www.php.net/trim"><span class="kw3">trim</span></a><span class="br0">&#40;</span><span class="re0">$instruction</span><span class="br0">&#91;</span><span class="nu0">1</span><span class="br0">&#93;</span><span class="br0">&#91;</span><span class="nu0">0</span><span class="br0">&#93;</span><span class="br0">&#41;</span> <span class="sy0">==</span> <span class="st_h">'Lists'</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="re0">$startPos</span> <span class="sy0">=</span> <span class="re0">$instruction</span><span class="br0">&#91;</span><span class="nu0">2</span><span class="br0">&#93;</span><span class="sy0">;</span> <span class="re0">$inSection</span> <span class="sy0">=</span> <span class="kw4">TRUE</span><span class="sy0">;</span> <span class="br0">&#125;</span> <span class="br0">&#125;</span> <span class="kw1">else</span> <span class="br0">&#123;</span> &nbsp; <span class="co1">// Look for the end of the section</span> <span class="kw1">if</span> <span class="br0">&#40;</span> <span class="re0">$instruction</span><span class="br0">&#91;</span><span class="nu0">0</span><span class="br0">&#93;</span> <span class="sy0">==</span> <span class="st_h">'section_close'</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$endPos</span> <span class="sy0">=</span> <span class="re0">$instruction</span><span class="br0">&#91;</span><span class="nu0">2</span><span class="br0">&#93;</span><span class="sy0">;</span> <span class="kw1">break</span><span class="sy0">;</span> <span class="br0">&#125;</span> <span class="br0">&#125;</span> <span class="br0">&#125;</span> &nbsp; <span class="co1">// Normalize and pad the document in the same way the parse does</span> <span class="co1">// so that byte indexes with match</span> <span class="re0">$doc</span> <span class="sy0">=</span> <span class="st0">&quot;<span class="es1">\n</span>&quot;</span><span class="sy0">.</span><a href="http://www.php.net/str_replace"><span class="kw3">str_replace</span></a><span class="br0">&#40;</span><span class="st0">&quot;<span class="es1">\r</span><span class="es1">\n</span>&quot;</span><span class="sy0">,</span><span class="st0">&quot;<span class="es1">\n</span>&quot;</span><span class="sy0">,</span><span class="re0">$doc</span><span class="br0">&#41;</span><span class="sy0">.</span><span class="st0">&quot;<span class="es1">\n</span>&quot;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Get the text before the section we want</span> <span class="re0">$before</span> <span class="sy0">=</span> <a href="http://www.php.net/substr"><span class="kw3">substr</span></a><span class="br0">&#40;</span><span class="re0">$doc</span><span class="sy0">,</span> <span class="nu0">0</span><span class="sy0">,</span> <span class="re0">$startPos</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$section</span> <span class="sy0">=</span> <a href="http://www.php.net/substr"><span class="kw3">substr</span></a><span class="br0">&#40;</span><span class="re0">$doc</span><span class="sy0">,</span> <span class="re0">$startPos</span><span class="sy0">,</span> <span class="br0">&#40;</span><span class="re0">$endPos</span><span class="sy0">-</span><span class="re0">$startPos</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$after</span> <span class="sy0">=</span> <a href="http://www.php.net/substr"><span class="kw3">substr</span></a><span class="br0">&#40;</span><span class="re0">$doc</span><span class="sy0">,</span> <span class="re0">$endPos</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> </div> <div class="secedit editbutton_section editbutton_10"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Selecting Text (for sections)] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="selecting_text_for_sections" /><input type="hidden" name="codeblockOffset" value="24" /><input type="hidden" name="range" value="39091-41686" /><button type="submit" title="Selecting Text (for sections)">編集</button></div></form></div> <h3 class="sectionedit11" id="managing_data_file_input_for_patterns">Managing Data File Input for Patterns</h3> <div class="level3"> <p> DokuWiki stores parts of some patterns in external data files (e.g. the smileys). Because the parsing and output of the document are now separate stages, handled by different components, a different approach is required for using this data, compared to earlier parser versions. </p> <p> For the relevant modes, each accepts a plain list of elements which it builds into a list of patterns for registering with the Lexer. </p> <p> For example; </p> <pre class="code php"><span class="co1">// A plain list of smiley tokens...</span> <span class="re0">$smileys</span> <span class="sy0">=</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span> <span class="st_h">':-)'</span><span class="sy0">,</span> <span class="st_h">':-('</span><span class="sy0">,</span> <span class="st_h">';-)'</span><span class="sy0">,</span> <span class="co1">// etc.</span> <span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Create the mode</span> <span class="re0">$SmileyMode</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Parser_Mode_Smiley<span class="br0">&#40;</span><span class="re0">$smileys</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Add it to the parser</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="re0">$SmileyMode</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> <p> The parser is not interested in the output format for the smileys. </p> <p> The other modes this applies to are defined by the classes; </p> <ul> <li class="level1"><div class="li"> <code>Doku_Parser_Mode_Acronym</code> - for acronyms</div> </li> <li class="level1"><div class="li"> <code>Doku_Parser_Mode_Wordblock</code> - to block specific words (e.g. bad language)</div> </li> <li class="level1"><div class="li"> <code>Doku_Parser_Mode_Entity</code> - for typography</div> </li> </ul> <p> Each accepts a list of “interesting strings” to its constructor, in the same way as the smileys. </p> <p> In practice it is probably worth defining functions for retrieval of the data from the configuration files and storing the associative arrays in a static value e.g.; </p> <pre class="code php"><span class="kw2">function</span> getSmileys<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; static <span class="re0">$smileys</span> <span class="sy0">=</span> <span class="kw4">NULL</span><span class="sy0">;</span> &nbsp; <span class="kw1">if</span> <span class="br0">&#40;</span> <span class="sy0">!</span><span class="re0">$smileys</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="re0">$smileys</span> <span class="sy0">=</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="re0">$lines</span> <span class="sy0">=</span> <a href="http://www.php.net/file"><span class="kw3">file</span></a><span class="br0">&#40;</span> DOKU_CONF <span class="sy0">.</span> <span class="st_h">'smileys.conf'</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="kw1">foreach</span><span class="br0">&#40;</span><span class="re0">$lines</span> <span class="kw1">as</span> <span class="re0">$line</span><span class="br0">&#41;</span><span class="br0">&#123;</span> &nbsp; <span class="co1">//ignore comments</span> <span class="re0">$line</span> <span class="sy0">=</span> <a href="http://www.php.net/preg_replace"><span class="kw3">preg_replace</span></a><span class="br0">&#40;</span><span class="st_h">'/#.*$/'</span><span class="sy0">,</span><span class="st_h">''</span><span class="sy0">,</span><span class="re0">$line</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="re0">$line</span> <span class="sy0">=</span> <a href="http://www.php.net/trim"><span class="kw3">trim</span></a><span class="br0">&#40;</span><span class="re0">$line</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="kw1">if</span><span class="br0">&#40;</span><a href="http://www.php.net/empty"><span class="kw3">empty</span></a><span class="br0">&#40;</span><span class="re0">$line</span><span class="br0">&#41;</span><span class="br0">&#41;</span> <span class="kw1">continue</span><span class="sy0">;</span> &nbsp; <span class="re0">$smiley</span> <span class="sy0">=</span> <a href="http://www.php.net/preg_split"><span class="kw3">preg_split</span></a><span class="br0">&#40;</span><span class="st_h">'/\s+/'</span><span class="sy0">,</span><span class="re0">$line</span><span class="sy0">,</span><span class="nu0">2</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Build the associative array</span> <span class="re0">$smileys</span><span class="br0">&#91;</span><span class="re0">$smiley</span><span class="br0">&#91;</span><span class="nu0">0</span><span class="br0">&#93;</span><span class="br0">&#93;</span> <span class="sy0">=</span> <span class="re0">$smiley</span><span class="br0">&#91;</span><span class="nu0">1</span><span class="br0">&#93;</span><span class="sy0">;</span> <span class="br0">&#125;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw1">return</span> <span class="re0">$smileys</span><span class="sy0">;</span> <span class="br0">&#125;</span></pre> <p> This function can now be used like; </p> <pre class="code php"><span class="co1">// Load the smiley patterns into the mode</span> <span class="re0">$SmileyMode</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Parser_Mode_Smiley<span class="br0">&#40;</span><a href="http://www.php.net/array_keys"><span class="kw3">array_keys</span></a><span class="br0">&#40;</span>getSmileys<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> <pre class="code php"><span class="co1">// Load the associate array in a renderer for lookup on output</span> <span class="re0">$Renderer</span><span class="sy0">-&gt;</span><span class="me1">smileys</span> <span class="sy0">=</span> getSmileys<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> <p> <strong>Note:</strong> Checking for links which should be blocked is handled in a separate manner, as described below. </p> </div> <div class="secedit editbutton_section editbutton_11"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Managing Data File Input for Patterns] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="managing_data_file_input_for_patterns" /><input type="hidden" name="codeblockOffset" value="25" /><input type="hidden" name="range" value="41687-44043" /><button type="submit" title="Managing Data File Input for Patterns">編集</button></div></form></div> <h3 class="sectionedit12" id="testing_links_for_spam">Testing Links for Spam</h3> <div class="level3"> <p> Ideally we want to be able to check for links to spam <em>before</em> storing a document (after editing). </p> <blockquote><div class="no"> This example should be viewed with caution. It makes a useful point of reference but having actually tested it since, it&#039;s very slow - probably easier just to use a simple function that is “syntax blind” but searches the entire document for links which match the blacklist. Meanwhile this example could be useful as a basis for building a &#039;wiki map&#039; or finding &#039;wanted pages&#039; by examining internal links. Probably best run as a cron job</div></blockquote> <p> This could be done by building a special Renderer that examines only the link-related callbacks and checks the <abbr title="Uniform Resource Locator">URL</abbr> against a blacklist. </p> <p> A function is needed to load the <code>spam.conf</code> and bundle it into a single regex; </p> <blockquote><div class="no"> Recently tested this approach (single regex) against the latest blacklist from <a href="http://blacklist.chongqed.org/" class="urlextern" title="http://blacklist.chongqed.org/">http://blacklist.chongqed.org/</a> and got errors about the final regex being too big. This should probably split the regex into smaller pieces and return them as an array</div></blockquote> <pre class="code php"><span class="kw2">function</span> getSpamPattern<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> static <span class="re0">$spamPattern</span> <span class="sy0">=</span> <span class="kw4">NULL</span><span class="sy0">;</span> &nbsp; <span class="kw1">if</span> <span class="br0">&#40;</span> <a href="http://www.php.net/is_null"><span class="kw3">is_null</span></a><span class="br0">&#40;</span><span class="re0">$spamPattern</span><span class="br0">&#41;</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="re0">$lines</span> <span class="sy0">=</span> <span class="sy0">@</span><a href="http://www.php.net/file"><span class="kw3">file</span></a><span class="br0">&#40;</span>DOKU_CONF <span class="sy0">.</span> <span class="st_h">'spam.conf'</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="kw1">if</span> <span class="br0">&#40;</span> <span class="sy0">!</span><span class="re0">$lines</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="re0">$spamPattern</span> <span class="sy0">=</span> <span class="st_h">''</span><span class="sy0">;</span> &nbsp; <span class="br0">&#125;</span> <span class="kw1">else</span> <span class="br0">&#123;</span> &nbsp; <span class="re0">$spamPattern</span> <span class="sy0">=</span> <span class="st_h">'#'</span><span class="sy0">;</span> <span class="re0">$sep</span> <span class="sy0">=</span> <span class="st_h">''</span><span class="sy0">;</span> &nbsp; <span class="kw1">foreach</span><span class="br0">&#40;</span><span class="re0">$lines</span> <span class="kw1">as</span> <span class="re0">$line</span><span class="br0">&#41;</span><span class="br0">&#123;</span> &nbsp; <span class="co1">// Strip comments</span> <span class="re0">$line</span> <span class="sy0">=</span> <a href="http://www.php.net/preg_replace"><span class="kw3">preg_replace</span></a><span class="br0">&#40;</span><span class="st_h">'/#.*$/'</span><span class="sy0">,</span><span class="st_h">''</span><span class="sy0">,</span><span class="re0">$line</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Ignore blank lines</span> <span class="re0">$line</span> <span class="sy0">=</span> <a href="http://www.php.net/trim"><span class="kw3">trim</span></a><span class="br0">&#40;</span><span class="re0">$line</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="kw1">if</span><span class="br0">&#40;</span><a href="http://www.php.net/empty"><span class="kw3">empty</span></a><span class="br0">&#40;</span><span class="re0">$line</span><span class="br0">&#41;</span><span class="br0">&#41;</span> <span class="kw1">continue</span><span class="sy0">;</span> &nbsp; <span class="re0">$spamPattern</span><span class="sy0">.=</span> <span class="re0">$sep</span><span class="sy0">.</span><span class="re0">$line</span><span class="sy0">;</span> &nbsp; <span class="re0">$sep</span> <span class="sy0">=</span> <span class="st_h">'|'</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="re0">$spamPattern</span> <span class="sy0">.=</span> <span class="st_h">'#si'</span><span class="sy0">;</span> <span class="br0">&#125;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw1">return</span> <span class="re0">$spamPattern</span><span class="sy0">;</span> <span class="br0">&#125;</span></pre> <p> Now we need to extend the base Renderer with one that will examine links only; </p> <pre class="code php"><span class="kw1">require_once</span> DOKU_INC <span class="sy0">.</span> <span class="st_h">'parser/renderer.php'</span><span class="sy0">;</span> &nbsp; <span class="kw2">class</span> Doku_Renderer_SpamCheck <span class="kw2">extends</span> Doku_Renderer <span class="br0">&#123;</span> &nbsp; <span class="co1">// This should be populated by the code executing the instructions</span> <span class="kw2">var</span> <span class="re0">$currentCall</span><span class="sy0">;</span> &nbsp; <span class="co1">// An array of instructions that contain spam</span> <span class="kw2">var</span> <span class="re0">$spamFound</span> <span class="sy0">=</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// pcre pattern for finding spam</span> <span class="kw2">var</span> <span class="re0">$spamPattern</span> <span class="sy0">=</span> <span class="st_h">'#^$#'</span><span class="sy0">;</span> &nbsp; <span class="kw2">function</span> internallink<span class="br0">&#40;</span><span class="re0">$link</span><span class="sy0">,</span> <span class="re0">$title</span> <span class="sy0">=</span> <span class="kw4">NULL</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__checkTitle<span class="br0">&#40;</span><span class="re0">$title</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> externallink<span class="br0">&#40;</span><span class="re0">$link</span><span class="sy0">,</span> <span class="re0">$title</span> <span class="sy0">=</span> <span class="kw4">NULL</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__checkLinkForSpam<span class="br0">&#40;</span><span class="re0">$link</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__checkTitle<span class="br0">&#40;</span><span class="re0">$title</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> interwikilink<span class="br0">&#40;</span><span class="re0">$link</span><span class="sy0">,</span> <span class="re0">$title</span> <span class="sy0">=</span> <span class="kw4">NULL</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__checkTitle<span class="br0">&#40;</span><span class="re0">$title</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> filelink<span class="br0">&#40;</span><span class="re0">$link</span><span class="sy0">,</span> <span class="re0">$title</span> <span class="sy0">=</span> <span class="kw4">NULL</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__checkLinkForSpam<span class="br0">&#40;</span><span class="re0">$link</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__checkTitle<span class="br0">&#40;</span><span class="re0">$title</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> windowssharelink<span class="br0">&#40;</span><span class="re0">$link</span><span class="sy0">,</span> <span class="re0">$title</span> <span class="sy0">=</span> <span class="kw4">NULL</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__checkLinkForSpam<span class="br0">&#40;</span><span class="re0">$link</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__checkTitle<span class="br0">&#40;</span><span class="re0">$title</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> email<span class="br0">&#40;</span><span class="re0">$address</span><span class="sy0">,</span> <span class="re0">$title</span> <span class="sy0">=</span> <span class="kw4">NULL</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__checkLinkForSpam<span class="br0">&#40;</span><span class="re0">$address</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__checkTitle<span class="br0">&#40;</span><span class="re0">$title</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> internalmedialink <span class="br0">&#40;</span><span class="re0">$src</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__checkLinkForSpam<span class="br0">&#40;</span><span class="re0">$src</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> externalmedialink<span class="br0">&#40;</span><span class="re0">$src</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__checkLinkForSpam<span class="br0">&#40;</span><span class="re0">$src</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> __checkTitle<span class="br0">&#40;</span><span class="re0">$title</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="kw1">if</span> <span class="br0">&#40;</span> <a href="http://www.php.net/is_array"><span class="kw3">is_array</span></a><span class="br0">&#40;</span><span class="re0">$title</span><span class="br0">&#41;</span> <span class="sy0">&amp;&amp;</span> <a href="http://www.php.net/isset"><span class="kw3">isset</span></a><span class="br0">&#40;</span><span class="re0">$title</span><span class="br0">&#91;</span><span class="st_h">'src'</span><span class="br0">&#93;</span><span class="br0">&#41;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__checkLinkForSpam<span class="br0">&#40;</span><span class="re0">$title</span><span class="br0">&#91;</span><span class="st_h">'src'</span><span class="br0">&#93;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> <span class="br0">&#125;</span> &nbsp; <span class="co1">// Pattern matching happens here</span> <span class="kw2">function</span> __checkLinkForSpam<span class="br0">&#40;</span><span class="re0">$link</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="kw1">if</span><span class="br0">&#40;</span> <a href="http://www.php.net/preg_match"><span class="kw3">preg_match</span></a><span class="br0">&#40;</span><span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">spamPattern</span><span class="sy0">,</span><span class="re0">$link</span><span class="br0">&#41;</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$spam</span> <span class="sy0">=</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">currentCall</span><span class="sy0">;</span> <span class="re0">$spam</span><span class="br0">&#91;</span><span class="nu0">3</span><span class="br0">&#93;</span> <span class="sy0">=</span> <span class="re0">$link</span><span class="sy0">;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">spamFound</span><span class="br0">&#91;</span><span class="br0">&#93;</span> <span class="sy0">=</span> <span class="re0">$spam</span><span class="sy0">;</span> <span class="br0">&#125;</span> <span class="br0">&#125;</span> <span class="br0">&#125;</span></pre> <p> Note the line <code>$spam[3] = $link;</code> in the <code>__checkLinkForSpam</code> method. This adds an additional element to the list of spam instructions found, making it easy to determine what the bad URLs were (e.g. for logging). </p> <p> Finally we can use this spam checking renderer like; </p> <pre class="code php"><span class="co1">// Create the parser</span> <span class="re0">$Parser</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Parser<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Add the Handler</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">Handler</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Handler<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Load the modes which could contain markup that might be</span> <span class="co1">// mistaken for a link</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'preformatted'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Preformatted<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'unformatted'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Unformatted<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'php'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_PHP<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'html'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_HTML<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'code'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Code<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'file'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_File<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'quote'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Quote<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Load the link modes...</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'internallink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_InternalLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'media'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Media<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'externallink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_ExternalLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'email'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Email<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'windowssharelink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_WindowsShareLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'filelink'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_FileLink<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Loads the raw wiki document</span> <span class="re0">$doc</span> <span class="sy0">=</span> <a href="http://www.php.net/file_get_contents"><span class="kw3">file_get_contents</span></a><span class="br0">&#40;</span>DOKU_DATA <span class="sy0">.</span> <span class="st_h">'wiki/spam.txt'</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Get a list of instructions</span> <span class="re0">$instructions</span> <span class="sy0">=</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">parse</span><span class="br0">&#40;</span><span class="re0">$doc</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Create a renderer</span> <span class="kw1">require_once</span> DOKU_INC <span class="sy0">.</span> <span class="st_h">'parser/spamcheck.php'</span><span class="sy0">;</span> <span class="re0">$Renderer</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Renderer_SpamCheck<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Load the spam regex</span> <span class="re0">$Renderer</span><span class="sy0">-&gt;</span><span class="me1">spamPattern</span> <span class="sy0">=</span> getSpamPattern<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Loop through the instructions</span> <span class="kw1">foreach</span> <span class="br0">&#40;</span> <span class="re0">$instructions</span> <span class="kw1">as</span> <span class="re0">$instruction</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="co1">// Store the current instruction</span> <span class="re0">$Renderer</span><span class="sy0">-&gt;</span><span class="me1">currentCall</span> <span class="sy0">=</span> <span class="re0">$instruction</span><span class="sy0">;</span> &nbsp; <a href="http://www.php.net/call_user_func_array"><span class="kw3">call_user_func_array</span></a><span class="br0">&#40;</span><a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span><span class="sy0">&amp;</span><span class="re0">$Renderer</span><span class="sy0">,</span> <span class="re0">$instruction</span><span class="br0">&#91;</span><span class="nu0">0</span><span class="br0">&#93;</span><span class="br0">&#41;</span><span class="sy0">,</span><span class="re0">$instruction</span><span class="br0">&#91;</span><span class="nu0">1</span><span class="br0">&#93;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="co1">// What spam did we find?</span> <span class="kw1">echo</span> <span class="st_h">'&lt;pre&gt;'</span><span class="sy0">;</span> <a href="http://www.php.net/print_r"><span class="kw3">print_r</span></a><span class="br0">&#40;</span><span class="re0">$Renderer</span><span class="sy0">-&gt;</span><span class="me1">spamFound</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="kw1">echo</span> <span class="st_h">'&lt;/pre&gt;'</span><span class="sy0">;</span></pre> <p> Because we don&#039;t need all the syntax modes, checking for spam in this manner should be faster than normal parsing of a document. </p> </div> <div class="secedit editbutton_section editbutton_12"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Testing Links for Spam] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="testing_links_for_spam" /><input type="hidden" name="codeblockOffset" value="29" /><input type="hidden" name="range" value="44044-49897" /><button type="submit" title="Testing Links for Spam">編集</button></div></form></div> <h3 class="sectionedit13" id="adding_substitution_syntax">Adding Substitution Syntax</h3> <div class="level3"> <p> <em>Warning:</em> the code below hasn&#039;t been tested - just an example </p> <p> As a simpler task in modifying the parser, this example will add a “bookmark” tag, which can be used to create a named anchor in a document for linking in. </p> <p> The syntax for the tag will be like; </p> <pre class="code">BM{My Bookmark}</pre> <p> The string “My Bookmark” is the name of the bookmark while the rest identifies it as being a bookmark. In <abbr title="HyperText Markup Language">HTML</abbr> this would correspond to; </p> <pre class="code html4strict"><span class="sc2">&lt;<a href="http://december.com/html/4/element/a.html"><span class="kw2">a</span></a> <span class="kw3">name</span><span class="sy0">=</span><span class="st0">&quot;My Bookmark&quot;</span>&gt;&lt;<span class="sy0">/</span><a href="http://december.com/html/4/element/a.html"><span class="kw2">a</span></a>&gt;</span></pre> <p> Adding this syntax requires the following steps; </p> <ol> <li class="level1"><div class="li"> Create a parser syntax mode to register with the Lexer</div> </li> <li class="level1"><div class="li"> Update the <code>Doku_Parser_Substition</code> function found at the end of <code>parser.php</code>, which is used to deliver a quick list of modes (used in classes like <code>Doku_Parser_Mode_Table</code></div> </li> <li class="level1"><div class="li"> Update the Handler with a method to catch bookmark tokens</div> </li> <li class="level1"><div class="li"> Update the abstract Renderer as documentation and any concrete Renderer implementations that need it.</div> </li> </ol> <p> Creating the parser mode means extending the <code>Doku_Parser_Mode</code> class and overriding its <code>connectTo</code> method; </p> <pre class="code php"><span class="kw2">class</span> Doku_Parser_Mode_Bookmark <span class="kw2">extends</span> Doku_Parser_Mode <span class="br0">&#123;</span> &nbsp; <span class="kw2">function</span> connectTo<span class="br0">&#40;</span><span class="re0">$mode</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="co1">// Allow word and space characters</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">Lexer</span><span class="sy0">-&gt;</span><span class="me1">addSpecialPattern</span><span class="br0">&#40;</span><span class="st_h">'BM\{[\w ]+\}'</span><span class="sy0">,</span><span class="re0">$mode</span><span class="sy0">,</span><span class="st_h">'bookmark'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="br0">&#125;</span></pre> <p> This will match the complete bookmark using a single pattern (extracting the bookmark name from the rest of the syntax will be left to the Handler). It uses the Lexer <code>addSpecialPattern</code> method so that the bookmark lives in its own state. </p> <p> <strong>Note</strong> the Lexer does not require the start / end pattern delimiters - it takes care of this for you. </p> <p> Because nothing <em>inside</em> the bookmark should be considered valid wiki markup, there is no reference here to other modes which this mode might accept. </p> <p> Next the <code>Doku_Parser_Substition</code> function in the <code>inc/parser/parser.php</code> file needs updating so that the new mode called <code>bookmark</code> is returned in the list; </p> <pre class="code php"><span class="kw2">function</span> Doku_Parser_Substition<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$modes</span> <span class="sy0">=</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span> <span class="st_h">'acronym'</span><span class="sy0">,</span><span class="st_h">'smiley'</span><span class="sy0">,</span><span class="st_h">'wordblock'</span><span class="sy0">,</span><span class="st_h">'entity'</span><span class="sy0">,</span><span class="st_h">'camelcaselink'</span><span class="sy0">,</span> <span class="st_h">'internallink'</span><span class="sy0">,</span><span class="st_h">'media'</span><span class="sy0">,</span><span class="st_h">'externallink'</span><span class="sy0">,</span><span class="st_h">'linebreak'</span><span class="sy0">,</span><span class="st_h">'email'</span><span class="sy0">,</span> <span class="st_h">'windowssharelink'</span><span class="sy0">,</span><span class="st_h">'filelink'</span><span class="sy0">,</span><span class="st_h">'notoc'</span><span class="sy0">,</span><span class="st_h">'multiplyentity'</span><span class="sy0">,</span> <span class="st_h">'quotes'</span><span class="sy0">,</span><span class="st_h">'bookmark'</span><span class="sy0">,</span> &nbsp; <span class="br0">&#41;</span><span class="sy0">;</span> <span class="kw1">return</span> <span class="re0">$modes</span><span class="sy0">;</span> <span class="br0">&#125;</span></pre> <p> This function is just to help registering these modes with other modes that accept them (e.g., lists can contain these modes - you can have a link inside a list) without having to list them in full each time they are needed. </p> <p> <strong>Note:</strong> Similar functions exist, like <code>Doku_Parser_Protected</code> and <code>Doku_Parser_Formatting</code> which return different groups of modes. The grouping of different types of syntax is not entirely perfect but still useful to save lines of code. </p> <p> With the syntax now described, a new method, which matches the name of the mode (i.e. <code>bookmark</code>) needs to be added to the Handler; </p> <pre class="code php"><span class="kw2">class</span> Doku_Handler <span class="br0">&#123;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="co1">// $match is the string which matched the Lexers regex for bookmarks</span> <span class="co1">// $state identifies the type of match (see the Lexer notes above)</span> <span class="co1">// $pos is the byte index in the raw doc of the first character of the match</span> <span class="kw2">function</span> bookmark<span class="br0">&#40;</span><span class="re0">$match</span><span class="sy0">,</span> <span class="re0">$state</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="co1">// Technically don’t need to worry about the state;</span> <span class="co1">// should always be DOKU_LEXER_SPECIAL or there's</span> <span class="co1">// a very serious bug</span> <span class="kw1">switch</span> <span class="br0">&#40;</span> <span class="re0">$state</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="kw1">case</span> DOKU_LEXER_SPECIAL<span class="sy0">:</span> &nbsp; <span class="co1">// Attempt to extract the bookmark name</span> <span class="kw1">if</span> <span class="br0">&#40;</span> <a href="http://www.php.net/preg_match"><span class="kw3">preg_match</span></a><span class="br0">&#40;</span><span class="st_h">'/^BM\{(\w{1,})\}$/'</span><span class="sy0">,</span> <span class="re0">$match</span><span class="sy0">,</span> <span class="re0">$nameMatch</span><span class="br0">&#41;</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="re0">$name</span> <span class="sy0">=</span> <span class="re0">$nameMatch</span><span class="br0">&#91;</span><span class="nu0">1</span><span class="br0">&#93;</span><span class="sy0">;</span> &nbsp; <span class="co1">// arg0: name of the Renderer method to call</span> <span class="co1">// arg1: array of arguments to the Renderer method</span> <span class="co1">// arg2: the byte index as before</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__addCall<span class="br0">&#40;</span><span class="st_h">'bookmark'</span><span class="sy0">,</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span><span class="re0">$name</span><span class="br0">&#41;</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// If the bookmark didn't have a valid name, simply pass it</span> <span class="co1">// through unmodified as plain text (cdata)</span> <span class="br0">&#125;</span> <span class="kw1">else</span> <span class="br0">&#123;</span> &nbsp; <span class="re0">$this</span><span class="sy0">-&gt;</span>__addCall<span class="br0">&#40;</span><span class="st_h">'cdata'</span><span class="sy0">,</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span><span class="re0">$match</span><span class="br0">&#41;</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="br0">&#125;</span> <span class="kw1">break</span><span class="sy0">;</span> &nbsp; <span class="br0">&#125;</span> &nbsp; <span class="co1">// Must return TRUE or the lexer will halt</span> <span class="kw1">return</span> <span class="kw4">TRUE</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="br0">&#125;</span></pre> <p> The final step is updating the Renderer (<code>renderer.php</code>) with a new function and implementing it in the XHTML Renderer (<code>xhtml.php</code>); </p> <pre class="code php"><span class="kw2">class</span> Doku_Renderer <span class="br0">&#123;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="kw2">function</span> bookmark<span class="br0">&#40;</span><span class="re0">$name</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="br0">&#125;</span></pre> <pre class="code php"><span class="kw2">class</span> Doku_Renderer_XHTML <span class="br0">&#123;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="kw2">function</span> bookmark<span class="br0">&#40;</span><span class="re0">$name</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$name</span> <span class="sy0">=</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__xmlEntities<span class="br0">&#40;</span><span class="re0">$name</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// id is required in XHTML while name still supported in 1.0</span> <span class="kw1">echo</span> <span class="st_h">'&lt;a class=&quot;bookmark&quot; name=&quot;'</span><span class="sy0">.</span><span class="re0">$name</span><span class="sy0">.</span><span class="st_h">'&quot; id=&quot;'</span><span class="sy0">.</span><span class="re0">$name</span><span class="sy0">.</span><span class="st_h">'&quot;&gt;&lt;/a&gt;'</span><span class="sy0">;</span> &nbsp; <span class="br0">&#125;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="br0">&#125;</span></pre> <p> See the <code>tests/parser_replacements.test.php</code> script for examples of how you might test this code. </p> </div> <div class="secedit editbutton_section editbutton_13"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Adding Substitution Syntax] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="adding_substitution_syntax" /><input type="hidden" name="codeblockOffset" value="32" /><input type="hidden" name="range" value="49898-55016" /><button type="submit" title="Adding Substitution Syntax">編集</button></div></form></div> <h3 class="sectionedit14" id="adding_formatting_syntax_with_state">Adding Formatting Syntax (with state)</h3> <div class="level3"> <p> <em>Warning:</em> the code below hasn&#039;t been tested - just an example </p> <p> To show more advanced use of the Lexer, this example will add markup that allows users to change the enclosed text color to red, yellow or green. </p> <p> The markup would look like; </p> <pre class="code">&lt;red&gt;This is red&lt;/red&gt;. This is black. &lt;yellow&gt;This is yellow&lt;/yellow&gt;. This is also black. &lt;green&gt;This is yellow&lt;/green&gt;.</pre> <p> The steps required to implement this are essentially the same as the previous example, stating with the new syntax mode, but add some additional detail as other modes are involved; </p> <pre class="code php"><span class="kw2">class</span> Doku_Parser_Mode_TextColors <span class="kw2">extends</span> Doku_Parser_Mode <span class="br0">&#123;</span> &nbsp; <span class="kw2">var</span> <span class="re0">$color</span><span class="sy0">;</span> &nbsp; <span class="kw2">var</span> <span class="re0">$colors</span> <span class="sy0">=</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span><span class="st_h">'red'</span><span class="sy0">,</span><span class="st_h">'green'</span><span class="sy0">,</span><span class="st_h">'blue'</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="kw2">function</span> Doku_Parser_Mode_TextColor<span class="br0">&#40;</span><span class="re0">$color</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="co1">// Just to help prevent mistakes using this mode</span> <span class="kw1">if</span> <span class="br0">&#40;</span> <span class="sy0">!</span><a href="http://www.php.net/array_key_exists"><span class="kw3">array_key_exists</span></a><span class="br0">&#40;</span><span class="re0">$color</span><span class="sy0">,</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">colors</span><span class="br0">&#41;</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> <a href="http://www.php.net/trigger_error"><span class="kw3">trigger_error</span></a><span class="br0">&#40;</span><span class="st_h">'Invalid color '</span><span class="sy0">.</span><span class="re0">$color</span><span class="sy0">,</span> <span class="kw4">E_USER_WARNING</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">color</span> <span class="sy0">=</span> <span class="re0">$color</span><span class="sy0">;</span> &nbsp; <span class="co1">// This mode accepts other modes;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">allowedModes</span> <span class="sy0">=</span> <a href="http://www.php.net/array_merge"><span class="kw3">array_merge</span></a> <span class="br0">&#40;</span> Doku_Parser_Formatting<span class="br0">&#40;</span><span class="re0">$color</span><span class="br0">&#41;</span><span class="sy0">,</span> Doku_Parser_Substition<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">,</span> Doku_Parser_Disabled<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="br0">&#125;</span> &nbsp; <span class="co1">// connectTo is called once for every mode registered with the Lexer</span> <span class="kw2">function</span> connectTo<span class="br0">&#40;</span><span class="re0">$mode</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="co1">// The lookahead pattern makes sure there's a closing tag...</span> <span class="re0">$pattern</span> <span class="sy0">=</span> <span class="st_h">'&lt;'</span><span class="sy0">.</span><span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">color</span><span class="sy0">.</span><span class="st_h">'&gt;(?=.*&lt;/'</span><span class="sy0">.</span><span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">color</span><span class="sy0">.</span><span class="st_h">'&gt;)'</span><span class="sy0">;</span> &nbsp; <span class="co1">// arg0: pattern to match to enter this mode</span> <span class="co1">// arg1: other modes where this pattern may match</span> <span class="co1">// arg2: name of the this mode</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">Lexer</span><span class="sy0">-&gt;</span><span class="me1">addEntryPattern</span><span class="br0">&#40;</span><span class="re0">$pattern</span><span class="sy0">,</span><span class="re0">$mode</span><span class="sy0">,</span><span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">color</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="co1">// post connect is only called once</span> <span class="kw2">function</span> postConnect<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="co1">// arg0: pattern to match to exit this mode</span> <span class="co1">// arg1: name of mode to exit</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">Lexer</span><span class="sy0">-&gt;</span><span class="me1">addExitPattern</span><span class="br0">&#40;</span><span class="st_h">'&lt;/'</span><span class="sy0">.</span><span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">color</span><span class="sy0">.</span><span class="st_h">'&gt;'</span><span class="sy0">,</span><span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">color</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="br0">&#125;</span> &nbsp; <span class="br0">&#125;</span></pre> <p> Some points on the above class. </p> <ol> <li class="level1"><div class="li"> It actually represents multiple modes, one for each color. The colors need separating into different modes so that <code>&lt;/green&gt;</code> doesn&#039;t end up being the closing tag for <code>&lt;red&gt;</code>, for example.</div> </li> <li class="level1"><div class="li"> These modes can contain other modes, for example <code>&lt;red&gt;**Warning**&lt;/red&gt;</code> for bold text which is red. This is registered in the constructor for this class by assigning the accepted mode names to the <code>allowedModes</code> property.</div> </li> <li class="level1"><div class="li"> When registering the entry pattern, it&#039;s a good idea to check the exit pattern exists (which is done with the lookahead). This should help protect users from themselves, when they forget to add the closing tag.</div> </li> <li class="level1"><div class="li"> The entry pattern needs to be registered for each mode within which the color tags could be used. By contrast we only need one exit pattern, so this is placed in the <code>postConnect</code> method, so that is only executed once, after all calls to <code>connectTo</code> on all modes have been called.</div> </li> </ol> <p> With the parsing mode class done, the new modes now need adding to the <code>Doku_Parser_Formatting</code> function; </p> <pre class="code php"><span class="kw2">function</span> Doku_Parser_Formatting<span class="br0">&#40;</span><span class="re0">$remove</span> <span class="sy0">=</span> <span class="st_h">''</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$modes</span> <span class="sy0">=</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span> <span class="st_h">'strong'</span><span class="sy0">,</span> <span class="st_h">'emphasis'</span><span class="sy0">,</span> <span class="st_h">'underline'</span><span class="sy0">,</span> <span class="st_h">'monospace'</span><span class="sy0">,</span> <span class="st_h">'subscript'</span><span class="sy0">,</span> <span class="st_h">'superscript'</span><span class="sy0">,</span> <span class="st_h">'deleted'</span><span class="sy0">,</span> <span class="st_h">'red'</span><span class="sy0">,</span><span class="st_h">'yellow'</span><span class="sy0">,</span><span class="st_h">'green'</span><span class="sy0">,</span> <span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$key</span> <span class="sy0">=</span> <a href="http://www.php.net/array_search"><span class="kw3">array_search</span></a><span class="br0">&#40;</span><span class="re0">$remove</span><span class="sy0">,</span> <span class="re0">$modes</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="kw1">if</span> <span class="br0">&#40;</span> <a href="http://www.php.net/is_int"><span class="kw3">is_int</span></a><span class="br0">&#40;</span><span class="re0">$key</span><span class="br0">&#41;</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> <a href="http://www.php.net/unset"><span class="kw3">unset</span></a><span class="br0">&#40;</span><span class="re0">$modes</span><span class="br0">&#91;</span><span class="re0">$key</span><span class="br0">&#93;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw1">return</span> <span class="re0">$modes</span><span class="sy0">;</span> <span class="br0">&#125;</span></pre> <p> <strong>Note</strong> this function is primed to unset one of the modes to prevent a formatting mode being nested inside itself (e.g. we don&#039;t want <code>&lt;red&gt;A &lt;red&gt;warning&lt;/red&gt; message&lt;/red&gt;</code> to happen). </p> <p> Next the Handler needs updating with one method for each color; </p> <pre class="code php"><span class="kw2">class</span> Doku_Handler <span class="br0">&#123;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="kw2">function</span> red<span class="br0">&#40;</span><span class="re0">$match</span><span class="sy0">,</span> <span class="re0">$state</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="co1">// The nestingTag method in the Handler is there</span> <span class="co1">// to save having to repeat the same code many</span> <span class="co1">// times. It will create an opening and closing</span> <span class="co1">// instruction for the entry and exit patterns,</span> <span class="co1">// while passing through the rest as cdata</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__nestingTag<span class="br0">&#40;</span><span class="re0">$match</span><span class="sy0">,</span> <span class="re0">$state</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="sy0">,</span> <span class="st_h">'red'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="kw1">return</span> <span class="kw4">TRUE</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> yellow<span class="br0">&#40;</span><span class="re0">$match</span><span class="sy0">,</span> <span class="re0">$state</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__nestingTag<span class="br0">&#40;</span><span class="re0">$match</span><span class="sy0">,</span> <span class="re0">$state</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="sy0">,</span> <span class="st_h">'yellow'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="kw1">return</span> <span class="kw4">TRUE</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> green<span class="br0">&#40;</span><span class="re0">$match</span><span class="sy0">,</span> <span class="re0">$state</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__nestingTag<span class="br0">&#40;</span><span class="re0">$match</span><span class="sy0">,</span> <span class="re0">$state</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="sy0">,</span> <span class="st_h">'green'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="kw1">return</span> <span class="kw4">TRUE</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="br0">&#125;</span></pre> <p> Finally we can update the Renderers; </p> <pre class="code php"><span class="kw2">class</span> Doku_Renderer <span class="br0">&#123;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="kw2">function</span> red_open<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> <span class="kw2">function</span> red_close<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> yellow_open<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> <span class="kw2">function</span> yellow_close<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> green_open<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> <span class="kw2">function</span> green_close<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="br0">&#125;</span></pre> <pre class="code php"><span class="kw2">class</span> Doku_Renderer_XHTML <span class="br0">&#123;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="kw2">function</span> red_open<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="kw1">echo</span> <span class="st_h">'&lt;span class=&quot;red&quot;&gt;'</span><span class="sy0">;</span> <span class="br0">&#125;</span> <span class="kw2">function</span> red_close<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="kw1">echo</span> <span class="st_h">'&lt;/span&gt;'</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> yellow_open<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="kw1">echo</span> <span class="st_h">'&lt;span class=&quot;yellow&quot;&gt;'</span><span class="sy0">;</span> <span class="br0">&#125;</span> <span class="kw2">function</span> yellow_close<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="kw1">echo</span> <span class="st_h">'&lt;/span&gt;'</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> green_open<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="kw1">echo</span> <span class="st_h">'&lt;span class=&quot;green&quot;&gt;'</span><span class="sy0">;</span> <span class="br0">&#125;</span> <span class="kw2">function</span> green_close<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="kw1">echo</span> <span class="st_h">'&lt;/span&gt;'</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="br0">&#125;</span></pre> <p> See the <code>tests/parser_formatting.test.php</code> script for examples of how you might write unit tests for this code. </p> </div> <div class="secedit editbutton_section editbutton_14"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Adding Formatting Syntax (with state)] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="adding_formatting_syntax_with_state" /><input type="hidden" name="codeblockOffset" value="39" /><input type="hidden" name="range" value="55017-60429" /><button type="submit" title="Adding Formatting Syntax (with state)">編集</button></div></form></div> <h3 class="sectionedit15" id="adding_block-level_syntax">Adding Block-Level Syntax</h3> <div class="level3"> <p> <em>Warning:</em> the code below hasn&#039;t been tested - just an example </p> <p> Extending the previous example, this one will create a new tag for marking up messages in the document as things still to be done. Example use might look like; </p> <pre class="code">===== Wiki Quotation Syntax ===== This syntax allows &lt;todo&gt; Describe quotation syntax &#039;&gt;&#039; &lt;/todo&gt; Some more text</pre> <p> This syntax might allow a tool to be added to search wiki pages and find things that still need something doing, as well as making it stand out in the document with some eye-catching style. </p> <p> What&#039;s different about this syntax is it should be displayed in a separate block in the document (e.g. inside <code>&lt;div/&gt;</code> so that it can be floated with <abbr title="Cascading Style Sheets">CSS</abbr>). This requires modifying the <code>Doku_Handler_Block</code> class, which loops through all the instructions after all tokens have been seen by the handler and takes care of adding <code>&lt;p/&gt;</code> tags. </p> <p> The parser mode for this syntax might be; </p> <pre class="code php"><span class="kw2">class</span> Doku_Parser_Mode_Todo <span class="kw2">extends</span> Doku_Parser_Mode <span class="br0">&#123;</span> &nbsp; <span class="kw2">function</span> Doku_Parser_Mode_Todo<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">allowedModes</span> <span class="sy0">=</span> <a href="http://www.php.net/array_merge"><span class="kw3">array_merge</span></a> <span class="br0">&#40;</span> Doku_Parser_Formatting<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">,</span> Doku_Parser_Substition<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">,</span> Doku_Parser_Disabled<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> connectTo<span class="br0">&#40;</span><span class="re0">$mode</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="re0">$pattern</span> <span class="sy0">=</span> <span class="st_h">'&lt;todo&gt;(?=.*&lt;/todo&gt;)'</span><span class="sy0">;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">Lexer</span><span class="sy0">-&gt;</span><span class="me1">addEntryPattern</span><span class="br0">&#40;</span><span class="re0">$pattern</span><span class="sy0">,</span><span class="re0">$mode</span><span class="sy0">,</span><span class="st_h">'todo'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw2">function</span> postConnect<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span><span class="me1">Lexer</span><span class="sy0">-&gt;</span><span class="me1">addExitPattern</span><span class="br0">&#40;</span><span class="st_h">'&lt;/todo&gt;'</span><span class="sy0">,</span><span class="st_h">'todo'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="br0">&#125;</span></pre> <p> This mode is then added to the <code>Doku_Parser_BlockContainers</code> function in <code>parser.php</code>; </p> <pre class="code php"><span class="kw2">function</span> Doku_Parser_BlockContainers<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$modes</span> <span class="sy0">=</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span> <span class="st_h">'footnote'</span><span class="sy0">,</span> <span class="st_h">'listblock'</span><span class="sy0">,</span> <span class="st_h">'table'</span><span class="sy0">,</span><span class="st_h">'quote'</span><span class="sy0">,</span> <span class="co1">// hr breaks the principle but HRs should not be used in tables / lists </span> <span class="co1">// so put it here</span> <span class="st_h">'hr'</span><span class="sy0">,</span> <span class="st_h">'todo'</span><span class="sy0">,</span> <span class="br0">&#41;</span><span class="sy0">;</span> <span class="kw1">return</span> <span class="re0">$modes</span><span class="sy0">;</span> <span class="br0">&#125;</span></pre> <p> Updating the <code>Doku_Handler</code> class simply requires; </p> <pre class="code php"><span class="kw2">class</span> Doku_Handler <span class="br0">&#123;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="kw2">function</span> todo<span class="br0">&#40;</span><span class="re0">$match</span><span class="sy0">,</span> <span class="re0">$state</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="re0">$this</span><span class="sy0">-&gt;</span>__nestingTag<span class="br0">&#40;</span><span class="re0">$match</span><span class="sy0">,</span> <span class="re0">$state</span><span class="sy0">,</span> <span class="re0">$pos</span><span class="sy0">,</span> <span class="st_h">'todo'</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="kw1">return</span> <span class="kw4">TRUE</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="br0">&#125;</span></pre> <p> But the <code>Doku_Handler_Block</code> class (found in <code>inc/parser/handler.php</code>) also needs updating, to register the todo opening and closing instructions; </p> <pre class="code php"><span class="kw2">class</span> Doku_Handler_Block <span class="br0">&#123;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="co1">// Blocks don't contain linefeeds</span> <span class="kw2">var</span> <span class="re0">$blockOpen</span> <span class="sy0">=</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span> <span class="st_h">'header'</span><span class="sy0">,</span> <span class="st_h">'listu_open'</span><span class="sy0">,</span><span class="st_h">'listo_open'</span><span class="sy0">,</span><span class="st_h">'listitem_open'</span><span class="sy0">,</span> <span class="st_h">'table_open'</span><span class="sy0">,</span><span class="st_h">'tablerow_open'</span><span class="sy0">,</span><span class="st_h">'tablecell_open'</span><span class="sy0">,</span><span class="st_h">'tableheader_open'</span><span class="sy0">,</span> <span class="st_h">'quote_open'</span><span class="sy0">,</span> <span class="st_h">'section_open'</span><span class="sy0">,</span> <span class="co1">// Needed to prevent p_open between header and section_open</span> <span class="st_h">'code'</span><span class="sy0">,</span><span class="st_h">'file'</span><span class="sy0">,</span><span class="st_h">'php'</span><span class="sy0">,</span><span class="st_h">'html'</span><span class="sy0">,</span><span class="st_h">'hr'</span><span class="sy0">,</span><span class="st_h">'preformatted'</span><span class="sy0">,</span> <span class="st_h">'todo_open'</span><span class="sy0">,</span> <span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="kw2">var</span> <span class="re0">$blockClose</span> <span class="sy0">=</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span> <span class="st_h">'header'</span><span class="sy0">,</span> <span class="st_h">'listu_close'</span><span class="sy0">,</span><span class="st_h">'listo_close'</span><span class="sy0">,</span><span class="st_h">'listitem_close'</span><span class="sy0">,</span> <span class="st_h">'table_close'</span><span class="sy0">,</span><span class="st_h">'tablerow_close'</span><span class="sy0">,</span><span class="st_h">'tablecell_close'</span><span class="sy0">,</span><span class="st_h">'tableheader_close'</span><span class="sy0">,</span> <span class="st_h">'quote_close'</span><span class="sy0">,</span> <span class="st_h">'section_close'</span><span class="sy0">,</span> <span class="co1">// Needed to prevent p_close after section_close</span> <span class="st_h">'code'</span><span class="sy0">,</span><span class="st_h">'file'</span><span class="sy0">,</span><span class="st_h">'php'</span><span class="sy0">,</span><span class="st_h">'html'</span><span class="sy0">,</span><span class="st_h">'hr'</span><span class="sy0">,</span><span class="st_h">'preformatted'</span><span class="sy0">,</span> <span class="st_h">'todo_close'</span><span class="sy0">,</span> <span class="br0">&#41;</span><span class="sy0">;</span> &nbsp;</pre> <p> By registering the <code>todo_open</code> and <code>todo_close</code> in the <code>$blockOpen</code> and <code>$blockClose</code> arrays, it instructs the <code>Doku_Handler_Block</code> class that any previous open paragraphs should be closed <em>before</em> entering the todo section then a new paragraph should start <em>after</em> the todo section. Inside the todo, no additional paragraphs should be inserted. </p> <p> With that done, the Renderers can be updated; </p> <pre class="code php"><span class="kw2">class</span> Doku_Renderer <span class="br0">&#123;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="kw2">function</span> todo_open<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> <span class="kw2">function</span> todo_close<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span><span class="br0">&#125;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="br0">&#125;</span></pre> <pre class="code php"><span class="kw2">class</span> Doku_Renderer_XHTML <span class="br0">&#123;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="kw2">function</span> todo_open<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="kw1">echo</span> <span class="st_h">'&lt;div class=&quot;todo&quot;&gt;'</span><span class="sy0">;</span> <span class="br0">&#125;</span> <span class="kw2">function</span> todo_close<span class="br0">&#40;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <span class="kw1">echo</span> <span class="st_h">'&lt;/div&gt;'</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="co1">// ...</span> &nbsp; <span class="br0">&#125;</span></pre> </div> <div class="secedit editbutton_section editbutton_15"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Adding Block-Level Syntax] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="adding_block-level_syntax" /><input type="hidden" name="codeblockOffset" value="45" /><input type="hidden" name="range" value="60430-64437" /><button type="submit" title="Adding Block-Level Syntax">編集</button></div></form></div> <h3 class="sectionedit16" id="serializing_the_renderer_instructions">Serializing the Renderer Instructions</h3> <div class="level3"> <p> It is possible to serialize the list of instructions output from the Handler, to eliminate the overhead of re-parsing the original document on each request, if the document itself hasn&#039;t changed. </p> <p> A simple implementation of this might be; </p> <pre class="code php"><span class="re0">$ID</span> <span class="sy0">=</span> DOKU_DATA <span class="sy0">.</span> <span class="st_h">'wiki/syntax.txt'</span><span class="sy0">;</span> <span class="re0">$cacheID</span> <span class="sy0">=</span> DOKU_CACHE <span class="sy0">.</span> <span class="re0">$ID</span><span class="sy0">.</span><span class="st_h">'.cache'</span><span class="sy0">;</span> &nbsp; <span class="co1">// If there's no cache file or it's out of date</span> <span class="co1">// (the original modified), get a fresh list of instructions</span> <span class="kw1">if</span> <span class="br0">&#40;</span> <span class="sy0">!</span><a href="http://www.php.net/file_exists"><span class="kw3">file_exists</span></a><span class="br0">&#40;</span><span class="re0">$cacheID</span><span class="br0">&#41;</span> <span class="sy0">||</span> <span class="br0">&#40;</span><a href="http://www.php.net/filemtime"><span class="kw3">filemtime</span></a><span class="br0">&#40;</span><span class="re0">$ID</span><span class="br0">&#41;</span> <span class="sy0">&gt;</span> <a href="http://www.php.net/filemtime"><span class="kw3">filemtime</span></a><span class="br0">&#40;</span><span class="re0">$cacheID</span><span class="br0">&#41;</span><span class="br0">&#41;</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="kw1">require_once</span> DOKU_INC <span class="sy0">.</span> <span class="st_h">'parser/parser.php'</span><span class="sy0">;</span> &nbsp; <span class="co1">// Create the parser</span> <span class="re0">$Parser</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Parser<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Add the Handler</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">Handler</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Handler<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Load all the modes</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'listblock'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_ListBlock<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'preformatted'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Preformatted<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'notoc'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_NoToc<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'header'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Header<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'table'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Table<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// etc. etc.</span> &nbsp; <span class="re0">$instructions</span> <span class="sy0">=</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">parse</span><span class="br0">&#40;</span><a href="http://www.php.net/file_get_contents"><span class="kw3">file_get_contents</span></a><span class="br0">&#40;</span><span class="re0">$filename</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Serialize and cache </span> <span class="re0">$sInstructions</span> <span class="sy0">=</span> <a href="http://www.php.net/serialize"><span class="kw3">serialize</span></a><span class="br0">&#40;</span><span class="re0">$instructions</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="kw1">if</span> <span class="br0">&#40;</span><span class="re0">$fh</span> <span class="sy0">=</span> <span class="sy0">@</span><a href="http://www.php.net/fopen"><span class="kw3">fopen</span></a><span class="br0">&#40;</span><span class="re0">$cacheID</span><span class="sy0">,</span> <span class="st_h">'a'</span><span class="br0">&#41;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="kw1">if</span> <span class="br0">&#40;</span><a href="http://www.php.net/fwrite"><span class="kw3">fwrite</span></a><span class="br0">&#40;</span><span class="re0">$fh</span><span class="sy0">,</span> <span class="re0">$sInstructions</span><span class="br0">&#41;</span> <span class="sy0">===</span> <span class="kw4">FALSE</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <a href="http://www.php.net/die"><span class="kw3">die</span></a><span class="br0">&#40;</span><span class="st0">&quot;Cannot write to file (<span class="es4">$cacheID</span>)&quot;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <a href="http://www.php.net/fclose"><span class="kw3">fclose</span></a><span class="br0">&#40;</span><span class="re0">$fh</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="br0">&#125;</span> <span class="kw1">else</span> <span class="br0">&#123;</span> <span class="co1">// Load the serialized instructions and unserialize</span> <span class="re0">$sInstructions</span> <span class="sy0">=</span> <a href="http://www.php.net/file_get_contents"><span class="kw3">file_get_contents</span></a><span class="br0">&#40;</span><span class="re0">$cacheID</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$instructions</span> <span class="sy0">=</span> <a href="http://www.php.net/unserialize"><span class="kw3">unserialize</span></a><span class="br0">&#40;</span><span class="re0">$sInstructions</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="re0">$Renderer</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Renderer_XHTML<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="kw1">foreach</span> <span class="br0">&#40;</span> <span class="re0">$instructions</span> <span class="kw1">as</span> <span class="re0">$instruction</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> <a href="http://www.php.net/call_user_func_array"><span class="kw3">call_user_func_array</span></a><span class="br0">&#40;</span> <a href="http://www.php.net/array"><span class="kw3">array</span></a><span class="br0">&#40;</span><span class="sy0">&amp;</span><span class="re0">$Renderer</span><span class="sy0">,</span> <span class="re0">$instruction</span><span class="br0">&#91;</span><span class="nu0">0</span><span class="br0">&#93;</span><span class="br0">&#41;</span><span class="sy0">,</span><span class="re0">$instruction</span><span class="br0">&#91;</span><span class="nu0">1</span><span class="br0">&#93;</span> <span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="kw1">echo</span> <span class="re0">$Renderer</span><span class="sy0">-&gt;</span><span class="me1">doc</span><span class="sy0">;</span></pre> <p> <strong>Note</strong> this implementation is not complete. What happens if someone modifies one of the <code>smiley.conf</code> files to add a new smiley, for example? The change will need to trigger updating the cache, so that the new smiley is parsed. Some care over file locking (or the renaming trick) may also be also be required. </p> </div> <div class="secedit editbutton_section editbutton_16"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Serializing the Renderer Instructions] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="serializing_the_renderer_instructions" /><input type="hidden" name="codeblockOffset" value="52" /><input type="hidden" name="range" value="64438-66627" /><button type="submit" title="Serializing the Renderer Instructions">編集</button></div></form></div> <h3 class="sectionedit17" id="serializing_the_parser">Serializing the Parser</h3> <div class="level3"> <p> Similar to the above example, it is also possible to serialize the Parser itself, before parsing begins. Because setting up the modes carries a fairly high overhead, this can add a small increase in performance. From loose benchmarking, parsing the wiki:syntax page on a single (slow!) system, what taking around 1.5 seconds to finish <em>without</em> serializing the Parser and about 1.25 seconds <em>with</em> the a serialized version of the Parser. </p> <p> In brief it can be implemented something like; </p> <pre class="code php"><span class="re0">$cacheId</span> <span class="sy0">=</span> DOKU_CACHE <span class="sy0">.</span> <span class="st_h">'parser.cache'</span><span class="sy0">;</span> &nbsp; <span class="kw1">if</span> <span class="br0">&#40;</span> <span class="sy0">!</span><a href="http://www.php.net/file_exists"><span class="kw3">file_exists</span></a><span class="br0">&#40;</span><span class="re0">$cacheId</span><span class="br0">&#41;</span> <span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="co1">// Create the parser...</span> <span class="re0">$Parser</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Parser<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">Handler</span> <span class="sy0">=</span> <span class="sy0">&amp;</span> <span class="kw2">new</span> Doku_Handler<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Load all the modes</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'listblock'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_ListBlock<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">addMode</span><span class="br0">&#40;</span><span class="st_h">'preformatted'</span><span class="sy0">,</span><span class="kw2">new</span> Doku_Parser_Mode_Preformatted<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="co2"># etc. </span> <span class="co1">// IMPORTANT: call connectModes()</span> <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">connectModes</span><span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Serialize</span> <span class="re0">$sParser</span> <span class="sy0">=</span> <a href="http://www.php.net/serialize"><span class="kw3">serialize</span></a><span class="br0">&#40;</span><span class="re0">$Parser</span><span class="br0">&#41;</span><span class="sy0">;</span> &nbsp; <span class="co1">// Write to file</span> <span class="kw1">if</span> <span class="br0">&#40;</span><span class="re0">$fh</span> <span class="sy0">=</span> <span class="sy0">@</span><a href="http://www.php.net/fopen"><span class="kw3">fopen</span></a><span class="br0">&#40;</span><span class="re0">$cacheID</span><span class="sy0">,</span> <span class="st_h">'a'</span><span class="br0">&#41;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> &nbsp; <span class="kw1">if</span> <span class="br0">&#40;</span><a href="http://www.php.net/fwrite"><span class="kw3">fwrite</span></a><span class="br0">&#40;</span><span class="re0">$fh</span><span class="sy0">,</span> <span class="re0">$sParser</span><span class="br0">&#41;</span> <span class="sy0">===</span> <span class="kw4">FALSE</span><span class="br0">&#41;</span> <span class="br0">&#123;</span> <a href="http://www.php.net/die"><span class="kw3">die</span></a><span class="br0">&#40;</span><span class="st0">&quot;Cannot write to file (<span class="es4">$cacheID</span>)&quot;</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <a href="http://www.php.net/fclose"><span class="kw3">fclose</span></a><span class="br0">&#40;</span><span class="re0">$fh</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="br0">&#125;</span> <span class="kw1">else</span> <span class="br0">&#123;</span> <span class="co1">// Otherwise load the serialized version</span> <span class="re0">$sParser</span> <span class="sy0">=</span> <a href="http://www.php.net/file_get_contents"><span class="kw3">file_get_contents</span></a><span class="br0">&#40;</span><span class="re0">$cacheID</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="re0">$Parser</span> <span class="sy0">=</span> <a href="http://www.php.net/unserialize"><span class="kw3">unserialize</span></a><span class="br0">&#40;</span><span class="re0">$sParser</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="br0">&#125;</span> &nbsp; <span class="re0">$Parser</span><span class="sy0">-&gt;</span><span class="me1">parse</span><span class="br0">&#40;</span><span class="re0">$doc</span><span class="br0">&#41;</span><span class="sy0">;</span></pre> <p> Some implementation notes which aren&#039;t covered above; </p> <ul> <li class="level1"><div class="li"> Should use some file locking when writing to the cache (or else create with different name then rename) otherwise a request may receive a partially complete cache file, if read while writing still in progress</div> </li> <li class="level1"><div class="li"> What to do if one of the <code>*.conf</code> files is updated? Need to flush the cache.</div> </li> <li class="level1"><div class="li"> May be different versions of the Parser (e.g. for spam checking) so use different cache IDs</div> </li> </ul> </div> <div class="secedit editbutton_section editbutton_17"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Serializing the Parser] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="serializing_the_parser" /><input type="hidden" name="codeblockOffset" value="53" /><input type="hidden" name="range" value="66628-68495" /><button type="submit" title="Serializing the Parser">編集</button></div></form></div> <h2 class="sectionedit18" id="testing">Testing</h2> <div class="level2"> <p> The <a href="https://en.wikipedia.org/wiki/Unit_testing" class="interwiki iw_wp" title="https://en.wikipedia.org/wiki/Unit_testing">unit tests</a> provided use <a href="http://www.lastcraft.com/simple_test.php" class="urlextern" title="http://www.lastcraft.com/simple_test.php">http://www.lastcraft.com/simple_test.php</a>. SimpleTest is an excellent tool for <a href="https://en.wikipedia.org/wiki/Unit_testing" class="interwiki iw_wp" title="https://en.wikipedia.org/wiki/Unit_testing">unit testing</a> PHP code. In particular, the documentation shines (see <a href="http://simpletest.sourceforge.net" class="urlextern" title="http://simpletest.sourceforge.net">http://simpletest.sourceforge.net</a> as well as that found at <a href="http://www.lastcraft.com/simple_test.php" class="urlextern" title="http://www.lastcraft.com/simple_test.php">http://www.lastcraft.com/simple_test.php</a>) and the code is very mature, taking care of many issues transparently (like catching PHP errors and reporting them in the test results). </p> <p> For the DokuWiki parser, tests have been provided for all the syntax implemented and I <em>strongly</em> recommend writing new tests if additional syntax is added. </p> <p> To get the tests running, you should only need to modify the file <code>tests/testconfig.php</code>, to point at the correct SimpleTest and DokuWiki directories. </p> <p> Some notes / recommendations; </p> <ol> <li class="level1"><div class="li"> Re-run the tests every time you change something in the parser - problems will often surface immediately saving lots of time.</div> </li> <li class="level1"><div class="li"> They only test specific cases. They don&#039;t guarantee there&#039;s no bugs only that those specific cases are working properly.</div> </li> <li class="level1"><div class="li"> If bugs are found, write a test for that bug while fixing it (better yet, <em>before</em> fixing it), to prevent it recurring.</div> </li> </ol> </div> <div class="secedit editbutton_section editbutton_18"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Testing] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="testing" /><input type="hidden" name="codeblockOffset" value="54" /><input type="hidden" name="range" value="68496-69700" /><button type="submit" title="Testing">編集</button></div></form></div> <h2 class="sectionedit19" id="bugs_issues">Bugs / Issues</h2> <div class="level2"> <p> Some things off the top of my head. </p> </div> <div class="secedit editbutton_section editbutton_19"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Bugs / Issues] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="bugs_issues" /><input type="hidden" name="codeblockOffset" value="54" /><input type="hidden" name="range" value="69701-69764" /><button type="submit" title="Bugs / Issues">編集</button></div></form></div> <h3 class="sectionedit20" id="order_of_adding_modes_important">Order of adding modes important</h3> <div class="level3"> <p> Haven&#039;t entirely nailed down the “rules” on this one but the order in which modes are added is important (and the Parser doesn&#039;t check this for you). In particular, the <code>eol</code> mode should be loaded last, as it eats linefeed characters that may prevent other modes like lists and tables from working properly. </p> <p> In general recommend loading the modes in the order used in the first example here. </p> <blockquote><div class="no"> From what I have worked out, order is only important if two or more modes have patterns which can be matched by the same set of characters - in which case the mode with the lowest sort number will win out. A syntax plugin can make use of this to replace a native DokuWiki handler, for an example see <a href="/plugin:code" class="wikilink1" title="plugin:code" data-wiki-id="plugin:code">code plugin</a> — <em> <a href="mailto:chris%20%5Bat%5D%20jalakai%20%5Bdot%5D%20co%20%5Bdot%5D%20uk" class="mail" title="chris [at] jalakai [dot] co [dot] uk">ChrisS</a> 2005-07-30 </em></div></blockquote> </div> <div class="secedit editbutton_section editbutton_20"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Order of adding modes important] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="order_of_adding_modes_important" /><input type="hidden" name="codeblockOffset" value="54" /><input type="hidden" name="range" value="69765-70587" /><button type="submit" title="Order of adding modes important">編集</button></div></form></div> <h3 class="sectionedit21" id="change_to_wordblock">Change to Wordblock</h3> <div class="level3"> <p> Originally the wordblock functionality was for match link URLs against a blacklist. This has been changed. The “wordblock” mode is used for matching things like rude words, fuck it. For prevent spam URLs, probably best to use the example above. </p> <p> One recommendation here - the <code>conf/wordblock.conf</code> file should be renamed <code>conf/spam.conf</code>, containing the <abbr title="Uniform Resource Locator">URL</abbr> blacklist. A new file <code>conf/badwords.conf</code> contains a list of rude words to censor. </p> </div> <div class="secedit editbutton_section editbutton_21"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Change to Wordblock] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="change_to_wordblock" /><input type="hidden" name="codeblockOffset" value="54" /><input type="hidden" name="range" value="70588-71067" /><button type="submit" title="Change to Wordblock">編集</button></div></form></div> <h3 class="sectionedit22" id="weakest_links">Weakest Links</h3> <div class="level3"> <p> From the point of view of design, the worst parts of the code are in <code>inc/parser/handler.php</code>, namely the “re-writing” classes; </p> <ul> <li class="level1"><div class="li"> <code>Doku_Handler_List</code> (inline re-writer)</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_Preformatted</code> (inline re-writer)</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_Quote</code> (inline re-writer)</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_Table</code> (inline re-writer)</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_Section</code> (post processing re-writer)</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_Block</code> (post processing re-writer)</div> </li> <li class="level1"><div class="li"> <code>Doku_Handler_Toc</code> (post processing re-writer)</div> </li> </ul> <p> The “inline re-writers” are used while the Handler is still receiving tokens from the Lexer while the “post processing re-writers” are invoked from <code>Doku_Handler::__finalize()</code> and loop once through the complete list of instructions the Handler has created (which has a performance overhead). </p> <p> It <em>may</em> be possible to eliminate <code>Doku_Handler_List</code>, <code>Doku_Handler_Quote</code> and <code>Doku_Handler_Table</code> by using multiple lexing modes (each of these currently uses only a single mode). </p> <p> Also it <em>may</em> be possible to change <code>Doku_Handler_Section</code> and <code>Doku_Handler_Toc</code> to being “inline re-writers”, triggered by header tokens received by the Handler. </p> <p> The most painful is the <code>Doku_Handler_Block</code> class, responsible for inserting paragraphs into the instructions. There may be a value in inserting further abstractions to make it easier to maintain but, in general, can&#039;t see a way to eliminate it completely and there&#039;s probably some bugs there which have yet to be found. </p> </div> <div class="secedit editbutton_section editbutton_22"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Weakest Links] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="weakest_links" /><input type="hidden" name="codeblockOffset" value="54" /><input type="hidden" name="range" value="71068-72567" /><button type="submit" title="Weakest Links">編集</button></div></form></div> <h3 class="sectionedit23" id="greedy_tags">Greedy Tags</h3> <div class="level3"> <p> Consider the following wiki syntax; </p> <pre class="code">Hello &lt;sup&gt;World ---- &lt;sup&gt;Goodbye&lt;/sup&gt; World</pre> <p> The user forgot to close the first &lt;sup&gt; tag. </p> <p> The result is; </p> <p> Hello <sup>World —- &lt;sup&gt;Goodbye</sup> World </p> <p> The first &lt;sup&gt; tag is being too greedy in checking for its entry pattern. </p> <p> This applies to all similar modes. The entry patterns currently check for that the closing tag exists somewhere but should also check that a second opening tag of the same sort was not found first. </p> </div> <div class="secedit editbutton_section editbutton_23"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Greedy Tags] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="greedy_tags" /><input type="hidden" name="codeblockOffset" value="54" /><input type="hidden" name="range" value="72568-73090" /><button type="submit" title="Greedy Tags">編集</button></div></form></div> <h3 class="sectionedit24" id="footnote_across_list">Footnote across list</h3> <div class="level3"> <p> There&#039;s one failing test in the test suite to document this problem. In essence, if a footnote is closed across multiple list items, it can have the effect of producing an opening footnote instruction without the corresponding closing instruction. The following is an example of syntax that would cause this problem; </p> <pre class="code"> *((A)) *(( B * C )) </pre> <p> For the time being users will have to fix pages where this has been done. The solution is to split list tokenization into multiple modes (currently there is only a single mode <code>listblock</code> for lists). </p> </div> <div class="secedit editbutton_section editbutton_24"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Footnote across list] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="footnote_across_list" /><input type="hidden" name="codeblockOffset" value="55" /><input type="hidden" name="range" value="73091-73688" /><button type="submit" title="Footnote across list">編集</button></div></form></div> <h3 class="sectionedit25" id="linefeed_grabbing">Linefeed grabbing</h3> <div class="level3"> <p> <a href="http://bugs.dokuwiki.org/index.php?do=details&amp;task_id=261" class="interwiki iw_bug" title="http://bugs.dokuwiki.org/index.php?do=details&amp;task_id=261">261</a> </p> <p> Because the header, horizontal rule, list, table, quote and preformatted (indented text) syntax relies on linefeed characters to mark their starts and ends, they require regexes which consume linefeed characters. This means users need to add an additional linefeed if a table appears immediately after a list, for example. </p> <p> Given the following wiki syntax; </p> <pre class="code">Before the list - List Item - List Item | Cell A | Cell B | | Cell C | Cell D | After the table</pre> <p> It produces; </p> <hr /> <p> Before the list </p> <ol> <li class="level1"><div class="li"> List Item</div> </li> <li class="level1"><div class="li"> List Item</div> </li> </ol> <p> | Cell A | Cell B | </p> <div class="table sectionedit26"><table class="inline"> <tr class="row0"> <td class="col0"> Cell C </td><td class="col1"> Cell D </td> </tr> </table></div> <div class="secedit editbutton_table editbutton_26"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[テーブル] " /><input type="hidden" name="target" value="table" /><input type="hidden" name="hid" value="table" /><input type="hidden" name="range" value="74288-74307" /><button type="submit" title="テーブル">編集</button></div></form></div> <p> After the table </p> <hr /> <p> Notice that the <strong>first row</strong> of the table is treated as plain text. </p> <p> To correct this the wiki syntax must have an additional linefeed between the list and the table (which could also contain text); </p> <pre class="code">Before the list - List Item - List Item | Cell A | Cell B | | Cell C | Cell D | After the table</pre> <p> Which looks like; </p> <hr /> <p> Before the list </p> <ol> <li class="level1"><div class="li"> List Item</div> </li> <li class="level1"><div class="li"> List Item</div> </li> </ol> <div class="table sectionedit27"><table class="inline"> <tr class="row0"> <td class="col0"> Cell A </td><td class="col1"> Cell B </td> </tr> <tr class="row1"> <td class="col0"> Cell C </td><td class="col1"> Cell D </td> </tr> </table></div> <div class="secedit editbutton_table editbutton_27"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[テーブル] " /><input type="hidden" name="target" value="table" /><input type="hidden" name="hid" value="table1" /><input type="hidden" name="range" value="74716-74755" /><button type="submit" title="テーブル">編集</button></div></form></div> <p> After the table </p> <hr /> <p> Without scanning the text multiple times (some kind of “pre-parse” operation which inserts linefeeds), can&#039;t see any easy solutions here. </p> </div> <div class="secedit editbutton_section editbutton_25"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Linefeed grabbing] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="linefeed_grabbing" /><input type="hidden" name="codeblockOffset" value="56" /><input type="hidden" name="range" value="73689-74916" /><button type="submit" title="Linefeed grabbing">編集</button></div></form></div> <h3 class="sectionedit28" id="lists_tables_quote_issue">Lists / Tables / Quote Issue</h3> <div class="level3"> <p> For list, table and quote syntax, there is a possibility of child syntax eating multiple “lines”. For example a table like; </p> <pre class="code">| Cell A | &lt;sup&gt;Cell B | | Cell C | Cell D&lt;/sup&gt; | | Cell E | Cell F |</pre> <p> Produces; </p> <hr /> <div class="table sectionedit29"><table class="inline"> <tr class="row0"> <td class="col0"> Cell A </td><td class="col1"> <sup>Cell B | | Cell C | Cell D</sup> </td> </tr> <tr class="row1"> <td class="col0"> Cell E </td><td class="col1"> Cell F </td> </tr> </table></div> <div class="secedit editbutton_table editbutton_29"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[テーブル] " /><input type="hidden" name="target" value="table" /><input type="hidden" name="hid" value="table2" /><input type="hidden" name="range" value="75186-75256" /><button type="submit" title="テーブル">編集</button></div></form></div><hr /> <p> Ideally this should be rendered like; </p> <hr /> <div class="table sectionedit30"><table class="inline"> <tr class="row0"> <td class="col0"> Cell A </td><td class="col1"> &lt;sup&gt;Cell B </td> </tr> <tr class="row1"> <td class="col0"> Cell C </td><td class="col1"> Cell D&lt;/sup&gt; </td> </tr> <tr class="row2"> <td class="col0"> Cell E </td><td class="col1"> Cell F </td> </tr> </table></div> <div class="secedit editbutton_table editbutton_30"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[テーブル] " /><input type="hidden" name="target" value="table" /><input type="hidden" name="hid" value="table3" /><input type="hidden" name="range" value="75309-75387" /><button type="submit" title="テーブル">編集</button></div></form></div><hr /> <p> i.e. the opening <code>&lt;sup&gt;</code> tag should be ignored because it has no valid closing tag. </p> <p> Fixing this will requiring using multiple modes inside tables, lists and quotes. </p> </div> <div class="secedit editbutton_section editbutton_28"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Lists / Tables / Quote Issue] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="lists_tables_quote_issue" /><input type="hidden" name="codeblockOffset" value="58" /><input type="hidden" name="range" value="74917-75567" /><button type="submit" title="Lists / Tables / Quote Issue">編集</button></div></form></div> <h3 class="sectionedit31" id="footnotes_and_blocks">Footnotes and blocks</h3> <div class="level3"> <p> Inside footnotes paragraph blocks are ignored and the equivalent of a <code>&lt;br/&gt;</code> instruction is used instead, to replace linefeeds. This is basically a result of the <code>Doku_Handler_Block</code> being awkward to maintain. Further to this, if a table, list, quote or horizontal rule is used inside a footnote, it <em>will</em> trigger a paragraph. </p> <p> This should be fixed by modifying <code>Doku_Handler_Block</code> but recommend an overhaul of the design before doing so. </p> </div> <div class="secedit editbutton_section editbutton_31"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Footnotes and blocks] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="footnotes_and_blocks" /><input type="hidden" name="codeblockOffset" value="59" /><input type="hidden" name="range" value="75568-76054" /><button type="submit" title="Footnotes and blocks">編集</button></div></form></div> <h3 class="sectionedit32" id="headers">Headers</h3> <div class="level3"> <p> Currently headers can reside on the same line as other preceding text. This is a knock on effect from the “Linefeed grabbing” issue described above and would require some kind of “pre parse” to fix it. For example; </p> <pre class="code">Before the header Some text == Header == After the header</pre> <p> If the behaviour is to be the same as the original DokuWiki parser, this should really be interpreted as; </p> <hr /> <p> Before the header Some text == Header == After the header </p> <hr /> <p> But in fact will result in; </p> <hr /> <p> Before the header Some text </p> </div> <h5 id="header">Header</h5> <div class="level5"> <p> After the header </p> <hr /> </div> <div class="secedit editbutton_section editbutton_32"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Headers] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="headers" /><input type="hidden" name="codeblockOffset" value="59" /><input type="hidden" name="range" value="76055-76644" /><button type="submit" title="Headers">編集</button></div></form></div> <h3 class="sectionedit33" id="block_list_issue">Block / List Issue</h3> <div class="level3"> <p> There is a problem if, before a list there is a blank line with two spaces, the whole including the list will be interpreted as a block: </p> <pre class="code">* list item * list item 2</pre> </div> <div class="secedit editbutton_section editbutton_33"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Block / List Issue] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="block_list_issue" /><input type="hidden" name="codeblockOffset" value="60" /><input type="hidden" name="range" value="76645-76845" /><button type="submit" title="Block / List Issue">編集</button></div></form></div> <h2 class="sectionedit34" id="todo">TODO</h2> <div class="level2"> <p> Some things that probably need doing. </p> </div> <div class="secedit editbutton_section editbutton_34"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[TODO] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="todo" /><input type="hidden" name="codeblockOffset" value="60" /><input type="hidden" name="range" value="76846-76902" /><button type="submit" title="TODO">編集</button></div></form></div> <h3 class="sectionedit35" id="more_state_to_state_closing_instructions">More State to State Closing Instructions</h3> <div class="level3"> <p> May be useful, for rendering other formats than XHTML, to add things like the indentation level to closing list instructions, etc. </p> <blockquote><div class="no"> why not just “render” to XML, and than apply some xslt/xml parsers on it?</div></blockquote> </div> <div class="secedit editbutton_section editbutton_35"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[More State to State Closing Instructions] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="more_state_to_state_closing_instructions" /><input type="hidden" name="codeblockOffset" value="60" /><input type="hidden" name="range" value="76903-77163" /><button type="submit" title="More State to State Closing Instructions">編集</button></div></form></div> <h3 class="sectionedit36" id="table_list_quote_sub_modes">Table / List / Quote sub modes</h3> <div class="level3"> <p> Lexer with multiple modes to prevent the issues with nesting states. </p> </div> <div class="secedit editbutton_section editbutton_36"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Table / List / Quote sub modes] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="table_list_quote_sub_modes" /><input type="hidden" name="codeblockOffset" value="60" /><input type="hidden" name="range" value="77164-77274" /><button type="submit" title="Table / List / Quote sub modes">編集</button></div></form></div> <h2 class="sectionedit37" id="discussion">Discussion</h2> <div class="level2"> </div> <div class="secedit editbutton_section editbutton_37"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Discussion] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="discussion" /><input type="hidden" name="codeblockOffset" value="60" /><input type="hidden" name="range" value="77275-77298" /><button type="submit" title="Discussion">編集</button></div></form></div> <h3 class="sectionedit38" id="enhance_the_parser_with_tree_structure">Enhance the Parser with Tree Structure</h3> <div class="level3"> <p> The parser is quite simple, because it is only a RegEx and flat list based parser. This makes the parser weak against code errors, and difficult to create correct xhtml specially in case of nested codes. To enhance the parser, it should generate a tree structure instead of a simple list. This allows too correct errors in text code, generate correct transitional xhtml code and maybe (only maybe…) save time or memory. Many issues with errors could by corrected too (see above, and DokuWiki has problems with big tables). As example, this makes it possible to send a “p_open” “p_close” blind to the renderer, and the renderer only generates code if there is not already a open or close P tag, and can close tags if they are forgotten, or delete unneeded empty open P tags, or close them before tags like TABLE or H2… (Or better use “New Paragraph Node” instead of P). Even a syntax check and corrections could be possible. a class based tree code you find here, if the idea is interesting for the developers of DokuWiki (but maybe there are even better codes): <a href="http://www.phpguru.org/static/tree.html" class="urlextern" title="http://www.phpguru.org/static/tree.html">Tree</a>. MediaWiki use a tree parser, but with <a href="http://www.phpguru.org/static/tree.html" class="urlextern" title="http://www.phpguru.org/static/tree.html">Tree</a> it is possible to create a simpler easier way to realize this. It is easy to pack this simple code in one php file and implement this inside the inc. For a parser it needs a “nested, from inside out” search algorithm, but this is quick done. And tree could enhance even the form.php code, and plugins could use it too. </p> <p> The parsed structure then could look like this, and with this tree it is easy to insert the xhtml tags (Open Close Tags) in a code generator: </p> <pre class="code">TAG-H1 CDATA: text NEWLINE, AMO=2 CDATA: text CDATA: text TAG-B CDATA: text CDATA: text LIST, TYPE=1 CDATA: text LIST CDATA: text CDATA: text LIST CDATA: text TABLE TABLEROW TABLECELL, BIND=2 CDATA: text TABLECELL CDATA: text CDATA: text</pre> <p> But, it would need a heavy and work intensive redesign of the parser. A tree parser is much stronger but more difficult to realize…… And the handling of plugins could be dificult too… </p> </div> <div class="secedit editbutton_section editbutton_38"><form class="button btn_secedit" method="post" action="/ja:devel:parser"><div class="no"><input type="hidden" name="do" value="edit" /><input type="hidden" name="rev" value="1362580768" /><input type="hidden" name="summary" value="[Enhance the Parser with Tree Structure] " /><input type="hidden" name="target" value="section" /><input type="hidden" name="hid" value="enhance_the_parser_with_tree_structure" /><input type="hidden" name="codeblockOffset" value="60" /><input type="hidden" name="range" value="77299-" /><button type="submit" title="Enhance the Parser with Tree Structure">編集</button></div></form></div><div class="footnotes"> <div class="fn"><sup><a href="#fnt__1" id="fn__1" class="fn_bot">1)</a></sup> <div class="content"><em class="u">L</em>exer refers to the class <code>Doku_Lexer</code> and the contents of the file <code>inc/parser/lexer.php</code></div></div> <div class="fn"><sup><a href="#fnt__2" id="fn__2" class="fn_bot">2)</a></sup> <div class="content">scan: reading a PHP string from start to end</div></div> <div class="fn"><sup><a href="#fnt__3" id="fn__3" class="fn_bot">3)</a></sup> <div class="content">the term token in this document refers to a regex match, made by the Lexer, and the corresponding method call on the Handler</div></div> <div class="fn"><sup><a href="#fnt__4" id="fn__4" class="fn_bot">4)</a></sup> <div class="content"><em class="u">H</em>andler refers to the class <code>Doku_Handler</code> and the contents of the file <code>inc/parser/handler.php</code></div></div> <div class="fn"><sup><a href="#fnt__5" id="fn__5" class="fn_bot">5)</a></sup> <div class="content">the sequence of instructions is stored in an array called <code>$calls</code>, which is a property of the Handler. It is intended for use with <a href="https://secure.php.net/call_user_func_array" class="interwiki iw_phpfn" title="https://secure.php.net/call_user_func_array">call_user_func_array</a></div></div> <div class="fn"><sup><a href="#fnt__6" id="fn__6" class="fn_bot">6)</a></sup> <div class="content"><em class="u">P</em>arser refers to the class <code>Doku_Parser</code> and the contents of the file <code>inc/parser/parser.php</code></div></div> <div class="fn"><sup><a href="#fnt__7" id="fn__7" class="fn_bot">7)</a></sup> <div class="content"><em class="u">R</em>ender refers to some class implementing <code>Doku_Renderer</code> - see <code>inc/parser/renderer.php</code> and <code>inc/parser/xhtml.php</code></div></div> <div class="fn"><sup><a href="#fnt__8" id="fn__8" class="fn_bot">8)</a></sup> <div class="content">The terms “state” and “mode” are used somewhat interchangeably when talking about the Lexer here</div></div> <div class="fn"><sup><a href="#fnt__9" id="fn__9" class="fn_bot">9)</a></sup> <div class="content">The notion of being “badly formed” is not applicable to the DokuWiki parser - it is designed to prevent issues where a user forgets to add the closing tag on some markup by ignoring the markup completely</div></div> <div class="fn"><sup><a href="#fnt__10" id="fn__10" class="fn_bot">10)</a></sup> <div class="content">simply calling it <code>list</code> results in a PHP parse error because <code>list</code> is a PHP keyword - so the parser has to use <code>listblock</code></div></div> </div> <!-- wikipage stop --> </div> <div class="docInfo"><bdi>ja/devel/parser.txt</bdi> · 最終更新: <time datetime="2013-03-06T15:39:28+0100">2013-03-06 15:39</time> by <bdi>Klap-in</bdi></div> <hr class="a11y" /> </div></main><!-- /content --> <!-- PAGE ACTIONS --> <nav id="dokuwiki__pagetools" aria-labelledby="dokuwiki__pagetools__heading"> <h3 class="a11y" id="dokuwiki__pagetools__heading">ページ用ツール</h3> <div class="tools"> <ul> <li class="edit"><a href="/ja:devel:parser?do=edit" title="文書の編集 [e]" rel="nofollow" accesskey="e"><span>文書の編集</span><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path d="M20.71 7.04c.39-.39.39-1.04 0-1.41l-2.34-2.34c-.37-.39-1.02-.39-1.41 0l-1.84 1.83 3.75 3.75M3 17.25V21h3.75L17.81 9.93l-3.75-3.75L3 17.25z"/></svg></a></li><li class="revs"><a href="/ja:devel:parser?do=revisions" title="以前のリビジョン [o]" rel="nofollow" accesskey="o"><span>以前のリビジョン</span><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path d="M11 7v5.11l4.71 2.79.79-1.28-4-2.37V7m0-5C8.97 2 5.91 3.92 4.27 6.77L2 4.5V11h6.5L5.75 8.25C6.96 5.73 9.5 4 12.5 4a7.5 7.5 0 0 1 7.5 7.5 7.5 7.5 0 0 1-7.5 7.5c-3.27 0-6.03-2.09-7.06-5h-2.1c1.1 4.03 4.77 7 9.16 7 5.24 0 9.5-4.25 9.5-9.5A9.5 9.5 0 0 0 12.5 2z"/></svg></a></li><li class="backlink"><a href="/ja:devel:parser?do=backlink" title="バックリンク" rel="nofollow"><span>バックリンク</span><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path d="M10.59 13.41c.41.39.41 1.03 0 1.42-.39.39-1.03.39-1.42 0a5.003 5.003 0 0 1 0-7.07l3.54-3.54a5.003 5.003 0 0 1 7.07 0 5.003 5.003 0 0 1 0 7.07l-1.49 1.49c.01-.82-.12-1.64-.4-2.42l.47-.48a2.982 2.982 0 0 0 0-4.24 2.982 2.982 0 0 0-4.24 0l-3.53 3.53a2.982 2.982 0 0 0 0 4.24m2.82-4.24c.39-.39 1.03-.39 1.42 0a5.003 5.003 0 0 1 0 7.07l-3.54 3.54a5.003 5.003 0 0 1-7.07 0 5.003 5.003 0 0 1 0-7.07l1.49-1.49c-.01.82.12 1.64.4 2.43l-.47.47a2.982 2.982 0 0 0 0 4.24 2.982 2.982 0 0 0 4.24 0l3.53-3.53a2.982 2.982 0 0 0 0-4.24.973.973 0 0 1 0-1.42z"/></svg></a></li><li class="top"><a href="#dokuwiki__top" title="文書の先頭へ [t]" rel="nofollow" accesskey="t"><span>文書の先頭へ</span><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"/></svg></a></li> </ul> </div> </nav> </div><!-- /wrapper --> <!-- ********** FOOTER ********** --> <footer id="dokuwiki__footer"><div class="pad"> <div class="license">特に明示されていない限り、本Wikiの内容は次のライセンスに従います: <bdi><a href="https://creativecommons.org/licenses/by-sa/4.0/deed.en" rel="license" class="urlextern">CC Attribution-Share Alike 4.0 International</a></bdi></div> <div class="buttons"> <a href="https://creativecommons.org/licenses/by-sa/4.0/deed.en" rel="license"><img src="/lib/images/license/button/cc-by-sa.png" alt="CC Attribution-Share Alike 4.0 International" /></a> <a href="https://www.dokuwiki.org/donate" title="Donate" ><img src="/lib/tpl/dokuwiki/images/button-donate.gif" width="80" height="15" alt="Donate" /></a> <a href="https://php.net" title="Powered by PHP" ><img src="/lib/tpl/dokuwiki/images/button-php.gif" width="80" height="15" alt="Powered by PHP" /></a> <a href="//validator.w3.org/check/referer" title="Valid HTML5" ><img src="/lib/tpl/dokuwiki/images/button-html5.png" width="80" height="15" alt="Valid HTML5" /></a> <a href="//jigsaw.w3.org/css-validator/check/referer?profile=css3" title="Valid CSS" ><img src="/lib/tpl/dokuwiki/images/button-css.png" width="80" height="15" alt="Valid CSS" /></a> <a href="https://dokuwiki.org/" title="Driven by DokuWiki" ><img src="/lib/tpl/dokuwiki/images/button-dw.png" width="80" height="15" alt="Driven by DokuWiki" /></a> </div> <div style="margin-top: 2em; font-size:90%" class="dokuwiki"><div style="float:right"><a href="http://www.splitbrain.org/personal#imprint" style="float:right; text-decoration: none; color:#333">Imprint</a></div></div><!-- Include for DokuWiki site wide top bar --> <style type="text/css"> @media screen { body { padding-top: 30px; } #global__header { position: absolute; top: 0; left: 0; text-align: left; vertical-align: middle; line-height: 1.5; background-color: #333; box-shadow: 0 0 8px rgba(0,0,0,0.5); width: 100%; margin: 0; padding: 5px 20px; -moz-box-sizing: border-box; -webkit-box-sizing: border-box; box-sizing: border-box; white-space: nowrap; overflow: hidden; } #global__header h2 { position: absolute; left: -99999em; top: 0; overflow: hidden; display: inline; } #global__header ul, #global__header li { margin: 0; padding: 0; list-style: none; display: inline; line-height: 1.5; } #global__header a { color: #bbb; text-decoration: none; margin-right: 20px; font-size: 14px; font-weight: normal; } #global__header a:hover, #global__header a:active, #global__header a:focus { color: #fff; text-decoration: underline; } #global__header form { float: right; margin: 0 0 0 20px; } #global__header input { background-color: #333; background-image: none; border: 1px solid #bbb; color: #fff; box-shadow: none; border-radius: 2px; margin: 0; line-height: normal; padding: 1px 0 1px 0; height: auto; } #global__header input.button { border: none; color: #bbb; } #global__header input.button:hover, #global__header input.button:active, #global__header input.button:focus { color: #fff; text-decoration: underline; } } /* /@media */ @media only screen and (min-width: 601px) { /* changes specific for www.dokuwiki.org */ #dokuwiki__header { padding-top: 3em; } #dokuwiki__usertools { top: 3em; } /* changes specific for bugs.dokuwiki.org */ div#container div#showtask { top: 40px; } } /* /@media */ @media only screen and (max-width: 600px) { body { padding-top: 0; } #global__header { position: static; white-space: normal; overflow: auto; } #global__header form { float: none; display: block; margin: 0 0 .4em; } } /* /@media */ @media print { #global__header { display: none; } } /* /@media */ </style> <div id="global__header"> <h2>Global DokuWiki Links</h2> <form method="get" action="https://search.dokuwiki.org/" target="_top"> <input type="text" name="q" title="Search all DokuWiki sites at once" class="input" /> <input type="submit" title="Search all DokuWiki sites at once" value="Search" class="button" /> </form> <ul> <li><a href="https://download.dokuwiki.org" title="Download the latest release" target="_top">Download</a></li> <li><a href="https://www.dokuwiki.org" title="Read the DokuWiki documentation" target="_top">Wiki</a></li> <li><a href="https://forum.dokuwiki.org" title="Ask questions in the DokuWiki forum" target="_top">Forum</a></li> <li><a href="https://irc.dokuwiki.org" title="Check IRC chat logs or join the chat" target="_top">IRC</a></li> <li><a href="https://github.com/splitbrain/dokuwiki/issues" title="Report and track bugs" target="_top">Bugs</a></li> <li><a href="https://translate.dokuwiki.org/" title="Help translating the DokuWiki interface" target="_top">Translate</a></li> <li><a href="https://github.com/splitbrain/dokuwiki" title="Access the most recent git commits" target="_top">Git</a></li> <li><a href="https://xref.dokuwiki.org/reference/dokuwiki/" title="Cross-Reference of the DokuWiki source code" target="_top">XRef</a></li> <li><a href="https://codesearch.dokuwiki.org/" title="Search through the sources of DokuWiki, plugins and templates" target="_top">Code Search</a></li> </ul> </div> <!-- end of DokuWiki top bar include --> </div></footer><!-- /footer --> </div></div><!-- /site --> <div class="no"><img src="/lib/exe/taskrunner.php?id=ja%3Adevel%3Aparser&amp;1744257983" width="2" height="1" alt="" /></div> <div id="screen__mode" class="no"></div></body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10