CINXE.COM

XQuery and XPath Regular Expressions

<!DOCTYPE html> <html lang="en"><head><meta charset="utf-8"><link rel=canonical href='https://https://www.regular-expressions.info//xpath.html'><title>XQuery and XPath Regular Expressions</title> <meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="author" content="Jan Goyvaerts"> <meta name="description" content="Explanation of the regular expression flavor used by the XQuery and XPath functions fn:matches, fn:replace and fn:tokenize."> <meta name="keywords" content=""> <link rel=stylesheet href="regex.css" type="text/css"><script src="theme.js" type="text/javascript"></script><link rel="alternate" type="application/rss+xml" title="New at Regular-Expressions.info" href="updates.xml"> </head> <body bgcolor=white text=black> <div id=top></div> <div id=btntop><div id=btngrid><a href="quickstart.html" target="_top"><div>Quick&nbsp;Start</div></a><a href="tutorial.html" target="_top"><div>Tutorial</div></a><a href="tools.html" target="_top"><div>Tools&nbsp;&amp;&nbsp;Languages</div></a><a href="examples.html" target="_top"><div>Examples</div></a><a href="refflavors.html" target="_top"><div>Reference</div></a><a href="books.html" target="_top"><div>Book&nbsp;Reviews</div></a></div></div> <div id=contents><div id=side> <TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>Regex Tools</TD></TR><TR><TD><A HREF="grep.html" TARGET=_top>grep</A></TD></TR><TR><TD><A HREF="powergrep.html" TARGET=_top>PowerGREP</A></TD></TR><TR><TD><A HREF="regexbuddy.html" TARGET=_top>RegexBuddy</A></TD></TR><TR><TD><A HREF="regexmagic.html" TARGET=_top>RegexMagic</A></TD></TR> </TABLE><TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>General Applications</TD></TR><TR><TD><A HREF="editpadlite.html" TARGET=_top>EditPad Lite</A></TD></TR><TR><TD><A HREF="editpadpro.html" TARGET=_top>EditPad Pro</A></TD></TR> </TABLE><TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>Languages &amp; Libraries</TD></TR><TR><TD><A HREF="boost.html" TARGET=_top>Boost</A></TD></TR><TR><TD><A HREF="delphi.html" TARGET=_top>Delphi</A></TD></TR><TR><TD><A HREF="gnu.html" TARGET=_top>GNU (Linux)</A></TD></TR><TR><TD><A HREF="groovy.html" TARGET=_top>Groovy</A></TD></TR><TR><TD><A HREF="java.html" TARGET=_top>Java</A></TD></TR><TR><TD><A HREF="javascript.html" TARGET=_top>JavaScript</A></TD></TR><TR><TD><A HREF="dotnet.html" TARGET=_top>.NET</A></TD></TR><TR><TD><A HREF="pcre.html" TARGET=_top>PCRE (C/C++)</A></TD></TR><TR><TD><A HREF="pcre2.html" TARGET=_top>PCRE2 (C/C++)</A></TD></TR><TR><TD><A HREF="perl.html" TARGET=_top>Perl</A></TD></TR><TR><TD><A HREF="php.html" TARGET=_top>PHP</A></TD></TR><TR><TD><A HREF="posix.html" TARGET=_top>POSIX</A></TD></TR><TR><TD><A HREF="powershell.html" TARGET=_top>PowerShell</A></TD></TR><TR><TD><A HREF="python.html" TARGET=_top>Python</A></TD></TR><TR><TD><A HREF="rlanguage.html" TARGET=_top>R</A></TD></TR><TR><TD><A HREF="ruby.html" TARGET=_top>Ruby</A></TD></TR><TR><TD><A HREF="stdregex.html" TARGET=_top>std::regex</A></TD></TR><TR><TD><A HREF="tcl.html" TARGET=_top>Tcl</A></TD></TR><TR><TD><A HREF="vbscript.html" TARGET=_top>VBScript</A></TD></TR><TR><TD><A HREF="vb.html" TARGET=_top>Visual Basic 6</A></TD></TR><TR><TD><A HREF="wxwidgets.html" TARGET=_top>wxWidgets</A></TD></TR><TR><TD><A HREF="xml.html" TARGET=_top>XML Schema</A></TD></TR><TR><TD><A HREF="realbasic.html" TARGET=_top>Xojo</A></TD></TR><TR><TD><A HREF="xpath.html" TARGET=_top>XQuery &amp; XPath</A></TD></TR><TR><TD><A HREF="xregexp.html" TARGET=_top>XRegExp</A></TD></TR> </TABLE><TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>Databases</TD></TR><TR><TD><A HREF="mysql.html" TARGET=_top>MySQL</A></TD></TR><TR><TD><A HREF="oracle.html" TARGET=_top>Oracle</A></TD></TR><TR><TD><A HREF="postgresql.html" TARGET=_top>PostgreSQL</A></TD></TR> </TABLE><TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>More on This Site</TD></TR><TR><TD><A HREF="index.html" TARGET=_top>Introduction</A></TD></TR><TR><TD><A HREF="quickstart.html" TARGET=_top>Regular Expressions Quick Start</A></TD></TR><TR><TD><A HREF="tutorial.html" TARGET=_top>Regular Expressions Tutorial</A></TD></TR><TR><TD><A HREF="replacetutorial.html" TARGET=_top>Replacement Strings Tutorial</A></TD></TR><TR><TD><A HREF="tools.html" TARGET=_top>Applications and Languages</A></TD></TR><TR><TD><A HREF="examples.html" TARGET=_top>Regular Expressions Examples</A></TD></TR><TR><TD><A HREF="refflavors.html" TARGET=_top>Regular Expressions Reference</A></TD></TR><TR><TD><A HREF="refreplace.html" TARGET=_top>Replacement Strings Reference</A></TD></TR><TR><TD><A HREF="books.html" TARGET=_top>Book Reviews</A></TD></TR><TR><TD><A HREF="print.html" TARGET=_top>Printable PDF</A></TD></TR><TR><TD><A HREF="about.html" TARGET=_top>About This Site</A></TD></TR><TR><TD><A HREF="updates.html" TARGET=_top>RSS Feed &amp; Blog</A></TD></TR></TABLE></DIV><div class=bodytext><div class=topad><A HREF="https://www.regexbuddy.com/xpath.html" TARGET="_top"><picture><source media="(max-width: 370px)" srcset="ads/320/rxbxpath100.png 1x, ads/320/rxbxpath150.png 1.5x, ads/320/rxbxpath200.png 2x, ads/320/rxbxpath250.png 2.5x, ads/320/rxbxpath300.png 3x, ads/320/rxbxpath350.png 3.5x, ads/320/rxbxpath400.png 4x"><source media="(max-width: 500px)" srcset="ads/360/rxbxpath100.png 1x, ads/360/rxbxpath150.png 1.5x, ads/360/rxbxpath200.png 2x, ads/360/rxbxpath250.png 2.5x, ads/360/rxbxpath300.png 3x, ads/360/rxbxpath350.png 3.5x, ads/360/rxbxpath400.png 4x"><source media="(max-width: 660px)" srcset="ads/480/rxbxpath100.png 1x, ads/480/rxbxpath150.png 1.5x, ads/480/rxbxpath200.png 2x, ads/480/rxbxpath250.png 2.5x, ads/480/rxbxpath300.png 3x, ads/480/rxbxpath350.png 3.5x, ads/480/rxbxpath400.png 4x"><source media="(max-width: 747px)" srcset="ads/640/rxbxpath100.png 1x, ads/640/rxbxpath150.png 1.5x, ads/640/rxbxpath200.png 2x, ads/640/rxbxpath250.png 2.5x, ads/640/rxbxpath300.png 3x, ads/640/rxbxpath350.png 3.5x, ads/640/rxbxpath400.png 4x"><img src="ads/728/rxbxpath100.png" srcset="ads/728/rxbxpath100.png 1x, ads/728/rxbxpath125.png 1.25x, ads/728/rxbxpath150.png 1.5x, ads/728/rxbxpath175.png 1.75x, ads/728/rxbxpath200.png 2x, ads/728/rxbxpath250.png 2.5x, ads/728/rxbxpath300.png 3x, ads/728/rxbxpath350.png 3.5x, ads/728/rxbxpath400.png 4x" alt="RegexBuddy—The best regex editor and tester for XPath developers!"></picture></A></div> <div class=bulb><h1>XQuery and XPath Regular Expressions</h1><script type="text/javascript">showbulb();</script></div> <p>The W3C standard for XQuery 1.0 and XPath 2.0 Functions and Operators defines three functions <tt>fn:matches</tt>, <tt>fn:replace</tt> and <tt>fn:tokenize</tt> that take a regular expression as one of their parameters. The XQuery and XPath standard introduces a new regular expression flavor for this purpose. This flavor is identical to the <A HREF="xml.html" TARGET="_top">XML Schema</A> flavor, with the addition of several features that are available in many modern regex flavors, but not in the XML Schema flavor. All valid XML Schema regexes are also valid XQuery/XPath regexes. The opposite is not always true.</p> <p>Because the XML Schema flavor is only used for true/false validity tests, these features were eliminated for performance reasons. The XQuery and XPath functions perform more complex regular expression operators, which require a more feature-rich regular expression flavor. That said, the XQuery and XPath regex flavor is still limited by modern standards.</p> <p>XQuery and XPath support the following features on top of the features in the <A HREF="xml.html" TARGET="_top">XML Schema</A> flavor:</p> <ul> <li><TT CLASS=syntax><SPAN CLASS="regexspecial">^</SPAN></TT> and <TT CLASS=syntax><SPAN CLASS="regexspecial">$</SPAN></TT> <A HREF="anchors.html" TARGET="_top">anchors</A> that match at the start or end of the string, or the start or end of a line (see matching modes below). These are the only two anchors supported. <li><a href="repeat.html#lazy">Lazy quantifiers</a>, using the familiar question mark syntax. <li><A HREF="brackets.html" TARGET="_top">Backreferences and capturing groups</A>. The XML Schema standard supports grouping, but groups were always non-capturing. XQuery/XPath allows backreferences to be used in the regular expression. <tt>fn:replaces</tt> supports backreferences in the <A HREF="refreplace.html" TARGET="_top">replacement text</A> using the <tt>$1</tt> notation. </ul> <p>While XML Schema allows no matching modes at all, the XQuery and XPath functions all accept an optional <tt>flags</tt> parameter to set matching modes. <A HREF="modifiers.html" TARGET="_top">Mode modifiers</A> within the regular expression are not supported. These four matching modes are available:</p> <ul> <li><tt class=code>i</tt> makes the regex match case insensitive.</li> <li><tt class=code>s</tt> enables “single-line mode”. In this mode, the <A HREF="dot.html" TARGET="_top">dot matches newlines</A>.</li> <li><tt class=code>m</tt> enables “multi-line mode”. In this mode, the <A HREF="anchors.html" TARGET="_top">caret and dollar match before and after newlines</A> in the subject string.</li> <li><tt class=code>x</tt> enables “<A HREF="freespacing.html" TARGET="_top">free-spacing mode</A>”. In this mode, whitespace between regex tokens is ignored. Comments are not supported.</li> </ul> <p>The flags are specified as a string with the letters of the modes you want to turn on. E.g. <tt>&quot;ix&quot;</tt> turns on case insensitivity and free-spacing. If you don’t want to set any matching modes, you can pass an empty string for the <tt>flags</tt> parameter, or omit the parameter entirely.</p> <h2>Three Regex Functions</h2> <p><tt><b>fn:matches</b>(subject, pattern, flags)</tt> takes a subject string and a regular expression as input. If the regular expression matches any part of the subject string, the function returns true. If it cannot match at all, it returns false. You’ll need to use <A HREF="anchors.html" TARGET="_top">anchors</A> if you only want the function to return true when the regex matches the entire subject string.</p> <p><tt><b>fn:replace</b>(subject, pattern, replacement, flags)</tt> takes a subject string, a regular expression, and a replacement string as input. It returns a new string that is the subject string with all matches of the regex pattern replaced with the replacement text. You can use <tt>$1</tt> to <tt>$99</tt> to re-insert capturing groups into the replacement. <tt>$0</tt> inserts the whole regex match. Literal dollar signs and backslashes in the replacement must be escaped with a backslash.</p> <p><tt>fn:replace</tt> cannot replace zero-length matches. E.g. <tt>fn:replace(&quot;test&quot;, &quot;^&quot;, &quot;prefix&quot;)</tt> will raise an error rather than returning “prefixtext” like regex-based search-and-replace does in most programming languages.</p> <p><tt><b>fn:tokenize</b>(subject, pattern, flags)</tt> is like the “split” function in many programming languages. It returns an array of strings that consists of all the substrings in subject between all the regex matches. The array will not contain the regex matches themselves. If the regex matches the first or last character in the subject string, then the first or last string in the resulting array will be empty strings.</p> <p><tt>fn:tokenize</tt> also cannot handle zero-length regular expression matches.</p><div id=cntmobi><p>|&ensp;<a href='quickstart.html'>Quick&nbsp;Start</a>&ensp;|&ensp;<a href='tutorial.html'>Tutorial</a>&ensp;|&ensp;<a href='tools.html'>Tools&nbsp;&amp;&nbsp;Languages</a>&ensp;|&ensp;<a href='examples.html'>Examples</a>&ensp;|&ensp;<a href='refflavors.html'>Reference</a>&ensp;|&ensp;<a href='books.html'>Book&nbsp;Reviews</a>&ensp;|</p><p>|&ensp;<a href='grep.html'>grep</a>&ensp;|&ensp;<a href='powergrep.html'>PowerGREP</a>&ensp;|&ensp;<a href='regexbuddy.html'>RegexBuddy</a>&ensp;|&ensp;<a href='regexmagic.html'>RegexMagic</a>&ensp;|</p><p>|&ensp;<a href='editpadlite.html'>EditPad Lite</a>&ensp;|&ensp;<a href='editpadpro.html'>EditPad Pro</a>&ensp;|</p><p>|&ensp;<a href='boost.html'>Boost</a>&ensp;|&ensp;<a href='delphi.html'>Delphi</a>&ensp;|&ensp;<a href='gnu.html'>GNU (Linux)</a>&ensp;|&ensp;<a href='groovy.html'>Groovy</a>&ensp;|&ensp;<a href='java.html'>Java</a>&ensp;|&ensp;<a href='javascript.html'>JavaScript</a>&ensp;|&ensp;<a href='dotnet.html'>.NET</a>&ensp;|&ensp;<a href='pcre.html'>PCRE (C/C++)</a>&ensp;|&ensp;<a href='pcre2.html'>PCRE2 (C/C++)</a>&ensp;|&ensp;<a href='perl.html'>Perl</a>&ensp;|&ensp;<a href='php.html'>PHP</a>&ensp;|&ensp;<a href='posix.html'>POSIX</a>&ensp;|&ensp;<a href='powershell.html'>PowerShell</a>&ensp;|&ensp;<a href='python.html'>Python</a>&ensp;|&ensp;<a href='rlanguage.html'>R</a>&ensp;|&ensp;<a href='ruby.html'>Ruby</a>&ensp;|&ensp;<a href='stdregex.html'>std::regex</a>&ensp;|&ensp;<a href='tcl.html'>Tcl</a>&ensp;|&ensp;<a href='vbscript.html'>VBScript</a>&ensp;|&ensp;<a href='vb.html'>Visual Basic 6</a>&ensp;|&ensp;<a href='wxwidgets.html'>wxWidgets</a>&ensp;|&ensp;<a href='xml.html'>XML Schema</a>&ensp;|&ensp;<a href='realbasic.html'>Xojo</a>&ensp;|&ensp;<a href='xpath.html'>XQuery &amp; XPath</a>&ensp;|&ensp;<a href='xregexp.html'>XRegExp</a>&ensp;|</p><p>|&ensp;<a href='mysql.html'>MySQL</a>&ensp;|&ensp;<a href='oracle.html'>Oracle</a>&ensp;|&ensp;<a href='postgresql.html'>PostgreSQL</a>&ensp;|</p></div> <div id=copyright> <P CLASS=copyright>Page URL: <A HREF="https://www.regular-expressions.info/xpath.html" TARGET="_top">https://www.regular-expressions.info/xpath.html</A><BR> Page last updated: 24 August 2021<BR> Site last updated: 06 November 2024<BR> Copyright &copy; 2003-2024 Jan Goyvaerts. All rights reserved.</P> </div> </div> </div> </body></html>

Pages: 1 2 3 4 5 6 7 8 9 10