CINXE.COM

Regular Expression Subroutines

<!DOCTYPE html> <html lang="en"><head><meta charset="utf-8"><link rel=canonical href='https://https://www.regular-expressions.info//subroutine.html'><title>Regular Expression Subroutines</title> <meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="author" content="Jan Goyvaerts"> <meta name="description" content=""> <meta name="keywords" content=""> <link rel=stylesheet href="regex.css" type="text/css"><script src="theme.js" type="text/javascript"></script><link rel="alternate" type="application/rss+xml" title="New at Regular-Expressions.info" href="updates.xml"> </head> <body bgcolor=white text=black> <div id=top></div> <div id=btntop><div id=btngrid><a href="quickstart.html" target="_top"><div>Quick&nbsp;Start</div></a><a href="tutorial.html" target="_top"><div>Tutorial</div></a><a href="tools.html" target="_top"><div>Tools&nbsp;&amp;&nbsp;Languages</div></a><a href="examples.html" target="_top"><div>Examples</div></a><a href="refflavors.html" target="_top"><div>Reference</div></a><a href="books.html" target="_top"><div>Book&nbsp;Reviews</div></a></div></div> <div id=contents><div id=side> <TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>Regex Tutorial</TD></TR><TR><TD><A HREF="tutorial.html" TARGET=_top>Introduction</A></TD></TR><TR><TD><A HREF="tutorialcnt.html" TARGET=_top>Table of Contents</A></TD></TR><TR><TD><A HREF="characters.html" TARGET=_top>Special Characters</A></TD></TR><TR><TD><A HREF="nonprint.html" TARGET=_top>Non-Printable Characters</A></TD></TR><TR><TD><A HREF="engine.html" TARGET=_top>Regex Engine Internals</A></TD></TR><TR><TD><A HREF="charclass.html" TARGET=_top>Character Classes</A></TD></TR><TR><TD><A HREF="charclasssubtract.html" TARGET=_top>Character Class Subtraction</A></TD></TR><TR><TD><A HREF="charclassintersect.html" TARGET=_top>Character Class Intersection</A></TD></TR><TR><TD><A HREF="shorthand.html" TARGET=_top>Shorthand Character Classes</A></TD></TR><TR><TD><A HREF="dot.html" TARGET=_top>Dot</A></TD></TR><TR><TD><A HREF="anchors.html" TARGET=_top>Anchors</A></TD></TR><TR><TD><A HREF="wordboundaries.html" TARGET=_top>Word Boundaries</A></TD></TR><TR><TD><A HREF="alternation.html" TARGET=_top>Alternation</A></TD></TR><TR><TD><A HREF="optional.html" TARGET=_top>Optional Items</A></TD></TR><TR><TD><A HREF="repeat.html" TARGET=_top>Repetition</A></TD></TR><TR><TD><A HREF="brackets.html" TARGET=_top>Grouping &amp; Capturing</A></TD></TR><TR><TD><A HREF="backref.html" TARGET=_top>Backreferences</A></TD></TR><TR><TD><A HREF="backref2.html" TARGET=_top>Backreferences, part 2</A></TD></TR><TR><TD><A HREF="named.html" TARGET=_top>Named Groups</A></TD></TR><TR><TD><A HREF="backrefrel.html" TARGET=_top>Relative Backreferences</A></TD></TR><TR><TD><A HREF="branchreset.html" TARGET=_top>Branch Reset Groups</A></TD></TR><TR><TD><A HREF="freespacing.html" TARGET=_top>Free-Spacing &amp; Comments</A></TD></TR><TR><TD><A HREF="unicode.html" TARGET=_top>Unicode</A></TD></TR><TR><TD><A HREF="modifiers.html" TARGET=_top>Mode Modifiers</A></TD></TR><TR><TD><A HREF="atomic.html" TARGET=_top>Atomic Grouping</A></TD></TR><TR><TD><A HREF="possessive.html" TARGET=_top>Possessive Quantifiers</A></TD></TR><TR><TD><A HREF="lookaround.html" TARGET=_top>Lookahead &amp; Lookbehind</A></TD></TR><TR><TD><A HREF="lookaround2.html" TARGET=_top>Lookaround, part 2</A></TD></TR><TR><TD><A HREF="keep.html" TARGET=_top>Keep Text out of The Match</A></TD></TR><TR><TD><A HREF="conditional.html" TARGET=_top>Conditionals</A></TD></TR><TR><TD><A HREF="balancing.html" TARGET=_top>Balancing Groups</A></TD></TR><TR><TD><A HREF="recurse.html" TARGET=_top>Recursion</A></TD></TR><TR><TD><A HREF="subroutine.html" TARGET=_top>Subroutines</A></TD></TR><TR><TD><A HREF="recurseinfinite.html" TARGET=_top>Infinite Recursion</A></TD></TR><TR><TD><A HREF="recurserepeat.html" TARGET=_top>Recursion &amp; Quantifiers</A></TD></TR><TR><TD><A HREF="recursecapture.html" TARGET=_top>Recursion &amp; Capturing</A></TD></TR><TR><TD><A HREF="recursebackref.html" TARGET=_top>Recursion &amp; Backreferences</A></TD></TR><TR><TD><A HREF="recursebacktrack.html" TARGET=_top>Recursion &amp; Backtracking</A></TD></TR><TR><TD><A HREF="posixbrackets.html" TARGET=_top>POSIX Bracket Expressions</A></TD></TR><TR><TD><A HREF="zerolength.html" TARGET=_top>Zero-Length Matches</A></TD></TR><TR><TD><A HREF="continue.html" TARGET=_top>Continuing Matches</A></TD></TR> </TABLE><TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>More on This Site</TD></TR><TR><TD><A HREF="index.html" TARGET=_top>Introduction</A></TD></TR><TR><TD><A HREF="quickstart.html" TARGET=_top>Regular Expressions Quick Start</A></TD></TR><TR><TD><A HREF="tutorial.html" TARGET=_top>Regular Expressions Tutorial</A></TD></TR><TR><TD><A HREF="replacetutorial.html" TARGET=_top>Replacement Strings Tutorial</A></TD></TR><TR><TD><A HREF="tools.html" TARGET=_top>Applications and Languages</A></TD></TR><TR><TD><A HREF="examples.html" TARGET=_top>Regular Expressions Examples</A></TD></TR><TR><TD><A HREF="refflavors.html" TARGET=_top>Regular Expressions Reference</A></TD></TR><TR><TD><A HREF="refreplace.html" TARGET=_top>Replacement Strings Reference</A></TD></TR><TR><TD><A HREF="books.html" TARGET=_top>Book Reviews</A></TD></TR><TR><TD><A HREF="print.html" TARGET=_top>Printable PDF</A></TD></TR><TR><TD><A HREF="about.html" TARGET=_top>About This Site</A></TD></TR><TR><TD><A HREF="updates.html" TARGET=_top>RSS Feed &amp; Blog</A></TD></TR></TABLE></DIV><div class=bodytext><div class=topad style="height:130px"><A HREF="https://www.regexbuddy.com/create.html" TARGET="_top"><picture><source media="(max-width: 370px)" srcset="ads/320/rxbtutorial100.png 1x, ads/320/rxbtutorial150.png 1.5x, ads/320/rxbtutorial200.png 2x, ads/320/rxbtutorial250.png 2.5x, ads/320/rxbtutorial300.png 3x, ads/320/rxbtutorial350.png 3.5x, ads/320/rxbtutorial400.png 4x"><source media="(max-width: 500px)" srcset="ads/360/rxbtutorial100.png 1x, ads/360/rxbtutorial150.png 1.5x, ads/360/rxbtutorial200.png 2x, ads/360/rxbtutorial250.png 2.5x, ads/360/rxbtutorial300.png 3x, ads/360/rxbtutorial350.png 3.5x, ads/360/rxbtutorial400.png 4x"><source media="(max-width: 660px)" srcset="ads/480/rxbtutorial100.png 1x, ads/480/rxbtutorial150.png 1.5x, ads/480/rxbtutorial200.png 2x, ads/480/rxbtutorial250.png 2.5x, ads/480/rxbtutorial300.png 3x, ads/480/rxbtutorial350.png 3.5x, ads/480/rxbtutorial400.png 4x"><source media="(max-width: 747px)" srcset="ads/640/rxbtutorial100.png 1x, ads/640/rxbtutorial150.png 1.5x, ads/640/rxbtutorial200.png 2x, ads/640/rxbtutorial250.png 2.5x, ads/640/rxbtutorial300.png 3x, ads/640/rxbtutorial350.png 3.5x, ads/640/rxbtutorial400.png 4x"><img src="ads/728/rxbtutorial100.png" srcset="ads/728/rxbtutorial100.png 1x, ads/728/rxbtutorial125.png 1.25x, ads/728/rxbtutorial150.png 1.5x, ads/728/rxbtutorial175.png 1.75x, ads/728/rxbtutorial200.png 2x, ads/728/rxbtutorial250.png 2.5x, ads/728/rxbtutorial300.png 3x, ads/728/rxbtutorial350.png 3.5x, ads/728/rxbtutorial400.png 4x" alt="RegexBuddy—Better than a regular expression tutorial!"></picture></A></div> <div class=bulb><h1>Regular Expression Subroutines</h1><script type="text/javascript">showbulb();</script></div> <p><A HREF="perl.html" TARGET="_top">Perl 5.10</A>, <A HREF="pcre.html" TARGET="_top">PCRE 4.0</A>, and <A HREF="ruby.html" TARGET="_top">Ruby 1.9</A> support regular expression subroutine calls. These are very similar to <A HREF="recurse.html" TARGET="_top">regular expression recursion</A>. Instead of matching the entire regular expression again, a subroutine call only matches the regular expression inside a capturing group. You can make a subroutine call to any capturing group from anywhere in the regex. If you place a call inside the group that it calls, you’ll have a recursive capturing group.</p> <p>As with regex recursion, there is a wide variety of syntax that you can use for exactly the same thing. Perl uses <TT CLASS=code><SPAN CLASS="regexspecial">(?1)</SPAN></TT> to call a numbered group, <TT CLASS=code><SPAN CLASS="regexspecial">(?+1)</SPAN></TT> to call the next group, <TT CLASS=code><SPAN CLASS="regexspecial">(?-1)</SPAN></TT> to call the preceding group, and <TT CLASS=code><SPAN CLASS="regexspecial">(?&amp;name)</SPAN></TT> to call a named group. You can use all of these to reference the same group. <TT CLASS=syntax><SPAN CLASS="regexspecial">(?+1)</SPAN><SPAN CLASS="regexnest1">(?'name'</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">(?1)</SPAN><SPAN CLASS="regexspecial">(?-1)</SPAN><SPAN CLASS="regexspecial">(?&amp;name)</SPAN></TT> matches a string that is five letters long and consists only of the first three letters of the alphabet. This regex is exactly the same as <TT CLASS=syntax><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">(?'name'</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN></TT>.</p> <p>PCRE was the first regex engine to support subroutine calls. <TT CLASS=syntax><SPAN CLASS="regexnest1">(?P&lt;name&gt;</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">(?1)</SPAN><SPAN CLASS="regexspecial">(?P&gt;name)</SPAN></TT> matches three letters like <TT CLASS=syntax><SPAN CLASS="regexnest1">(?P&lt;name&gt;</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN></TT> does. <TT CLASS=syntax><SPAN CLASS="regexspecial">(?1)</SPAN></TT> is a call to a numbered group and <TT CLASS=syntax><SPAN CLASS="regexspecial">(?P&gt;name)</SPAN></TT> is a call to a named group. The latter is called the “Python syntax” in the PCRE man page. While this syntax mimics the syntax Python uses for <A HREF="python.html" TARGET="_top">named capturing groups</A>, it is a PCRE invention. Python does not support subroutine calls or recursion. PCRE 7.2 added <TT CLASS=syntax><SPAN CLASS="regexspecial">(?+1)</SPAN></TT> and <TT CLASS=syntax><SPAN CLASS="regexspecial">(?-1)</SPAN></TT> for relative calls. PCRE 7.7 adds all the syntax used by Perl 5.10 and Ruby 2.0. Recent versions of <A HREF="php.html" TARGET="_top">PHP</A>, <A HREF="delphi.html" TARGET="_top">Delphi</A>, and <A HREF="rlanguage.html" TARGET="_top">R</A> also support all this syntax, as their regex functions are based on PCRE.</p> <p>The syntax used by Ruby 1.9 and later looks more like that of backreferences. <TT CLASS=code><SPAN CLASS="regexspecial">\g&lt;1&gt;</SPAN></TT> and <TT CLASS=code><SPAN CLASS="regexspecial">\g'1'</SPAN></TT> call a numbered group, <TT CLASS=code><SPAN CLASS="regexspecial">\g&lt;name&gt;</SPAN></TT> and <TT CLASS=code><SPAN CLASS="regexspecial">\g'name'</SPAN></TT> call a named group, while <TT CLASS=code><SPAN CLASS="regexspecial">\g&lt;-1&gt;</SPAN></TT> and <TT CLASS=code><SPAN CLASS="regexspecial">\g'-1'</SPAN></TT> call the preceding group. Ruby 2.0 adds <TT CLASS=code><SPAN CLASS="regexspecial">\g&lt;+1&gt;</SPAN></TT> and <TT CLASS=code><SPAN CLASS="regexspecial">\g'+1'</SPAN></TT> to call the next group. <TT CLASS=syntax><SPAN CLASS="regexspecial">\g&lt;+1&gt;</SPAN><SPAN CLASS="regexnest1">(?&lt;name&gt;</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">\g&lt;1&gt;</SPAN><SPAN CLASS="regexspecial">\g&lt;-1&gt;</SPAN><SPAN CLASS="regexspecial">\g&lt;name&gt;</SPAN></TT> and <TT CLASS=syntax><SPAN CLASS="regexspecial">\g'+1'</SPAN><SPAN CLASS="regexnest1">(?'name'</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">\g'1'</SPAN><SPAN CLASS="regexspecial">\g'-1'</SPAN><SPAN CLASS="regexspecial">\g'name'</SPAN></TT> match the same 5-letter string in Ruby 2.0 as the Perl example does in Perl. The syntax with angle brackets and with quotes can be used interchangeably.</p> <p><a href="jgsoft.html#v2">JGsoft V2</a> supports all three sets of syntax. As we’ll see later, there are differences in how Perl, PCRE, and Ruby deal with <A HREF="recursecapture.html" TARGET="_top">capturing</A>, <A HREF="recursebackref.html" TARGET="_top">backreferences</A>, and <A HREF="recursebacktrack.html" TARGET="_top">backtracking</A> during subroutine calls. While they copied each other’s syntax, they did not copy each other’s behavior. JGsoft V2, however, copied their syntax and their behavior. So JGsoft V2 has three different ways of doing regex recursion, which you choose by using a different syntax. But these differences do not come into play in the basic examples on this page.</p> <p><A HREF="boost.html" TARGET="_top">Boost</A> 1.42 copied the syntax from Perl but its implementation is marred by bugs, which are still not all fixed in version 1.62. Most significantly, quantifiers other than <tt>*</tt> or <tt>{0,}</tt> cause subroutine calls to misbehave. This is partially fixed in Boost 1.60 which correctly handles <tt>?</tt> and <tt>{0,1}</tt> too.</p> <p>Boost does not support the Ruby syntax for subroutine calls. In Boost <TT CLASS=code><SPAN CLASS="regexspecial">\g&lt;1&gt;</SPAN></TT> is a backreference&mdash;not a subroutine call&mdash;to capturing group 1. So <TT CLASS=syntax><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">ab</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">\g&lt;1&gt;</SPAN></TT> can match <tt class=match>aa</tt> and <tt class=match>bb</tt> but not <tt class=string>ab</tt> or <tt class=string>ba</tt>. In Ruby the same regex would match all four strings. No other flavor discussed in this tutorial uses this syntax for backreferences.</p> <a name="balanced"></a><h2>Matching Balanced Constructs</h2> <p>Recursion into a capturing group is a more flexible way of <a href="recurse.html#balanced">matching balanced constructs</a> than recursion of the whole regex. We can wrap the regex in a capturing group, recurse into the capturing group instead of the whole regex, and add anchors outside the capturing group. <TT CLASS=syntax><SPAN CLASS="regexspecial">\A</SPAN><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexplain">b</SPAN><SPAN CLASS="regexnest2">(?:</SPAN><SPAN CLASS="regexplain">m</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexspecial">(?1)</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexplain">e</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">\z</SPAN></TT> is the generic regex for checking that a string consists entirely of a correctly balanced construct. Again, <TT CLASS=syntax><SPAN CLASS="regexplain">b</SPAN></TT> is what begins the construct, <TT CLASS=syntax><SPAN CLASS="regexplain">m</SPAN></TT> is what can occur in the middle of the construct, and <TT CLASS=syntax><SPAN CLASS="regexplain">e</SPAN></TT> is what can occur at the end of the construct. For correct results, no two of <TT CLASS=syntax><SPAN CLASS="regexplain">b</SPAN></TT>, <TT CLASS=syntax><SPAN CLASS="regexplain">m</SPAN></TT>, and <TT CLASS=syntax><SPAN CLASS="regexplain">e</SPAN></TT> should be able to match the same text. You can use an <A HREF="atomic.html" TARGET="_top">atomic group</A> instead of the <A HREF="brackets.html" TARGET="_top">non-capturing group</A> for improved performance: <TT CLASS=syntax><SPAN CLASS="regexspecial">\A</SPAN><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexplain">b</SPAN><SPAN CLASS="regexnest2">(?&gt;</SPAN><SPAN CLASS="regexplain">m</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexspecial">(?1)</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexplain">e</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">\z</SPAN></TT>.</p> <p>Similarly, <TT CLASS=syntax><SPAN CLASS="regexspecial">\A</SPAN><SPAN CLASS="regexplain">o</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexplain">b</SPAN><SPAN CLASS="regexnest2">(?:</SPAN><SPAN CLASS="regexplain">m</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexspecial">(?1)</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexplain">e</SPAN><SPAN CLASS="regexplain">o</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">+</SPAN><SPAN CLASS="regexspecial">\z</SPAN></TT> and the optimized <TT CLASS=syntax><SPAN CLASS="regexspecial">\A</SPAN><SPAN CLASS="regexplain">o</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexspecial">+</SPAN><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexplain">b</SPAN><SPAN CLASS="regexnest2">(?&gt;</SPAN><SPAN CLASS="regexplain">m</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexspecial">(?1)</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexspecial">+</SPAN><SPAN CLASS="regexplain">e</SPAN><SPAN CLASS="regexplain">o</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexspecial">+</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">+</SPAN><SPAN CLASS="regexspecial">+</SPAN><SPAN CLASS="regexspecial">\z</SPAN></TT> match a string that consists of nothing but a sequence of one or more correctly balanced constructs, with possibly other text in between. Here, <TT CLASS=syntax><SPAN CLASS="regexplain">o</SPAN></TT> is what can occur outside the balanced constructs. It will often be the same as <TT CLASS=syntax><SPAN CLASS="regexplain">m</SPAN></TT>. <TT CLASS=syntax><SPAN CLASS="regexplain">o</SPAN></TT> should not be able to match the same text as <TT CLASS=syntax><SPAN CLASS="regexplain">b</SPAN></TT> or <TT CLASS=syntax><SPAN CLASS="regexplain">e</SPAN></TT>.</p> <p><TT CLASS=syntax><SPAN CLASS="regexspecial">\A</SPAN><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexescaped">\(</SPAN><SPAN CLASS="regexnest2">(?&gt;</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccspecial">^</SPAN><SPAN CLASS="regexccliteral">()</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexspecial">(?1)</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexescaped">\)</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">\z</SPAN></TT> matches a string that consists of nothing but a correctly balanced pair of parentheses, possibly with text between them. <TT CLASS=syntax><SPAN CLASS="regexspecial">\A</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccspecial">^</SPAN><SPAN CLASS="regexccliteral">()</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexspecial">+</SPAN><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexescaped">\(</SPAN><SPAN CLASS="regexnest2">(?&gt;</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccspecial">^</SPAN><SPAN CLASS="regexccliteral">()</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexspecial">(?1)</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexspecial">+</SPAN><SPAN CLASS="regexescaped">\)</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccspecial">^</SPAN><SPAN CLASS="regexccliteral">()</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexspecial">+</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">+</SPAN><SPAN CLASS="regexspecial">+</SPAN><SPAN CLASS="regexspecial">\z</SPAN></TT> also allows text before the first opening parenthesis and after the last closing parenthesis in the string.</p> <h2>Matching The Same Construct More Than Once</h2> <p>A regex that needs to match the same kind of construct (but not the exact same text) more than once in different parts of the regex can be shorter and more concise when using subroutine calls. Suppose you need a regex to match patient records like these:</p> <pre> Name: John Doe Born: 17-Jan-1964 Admitted: 30-Jul-2013 Released: 3-Aug-2013 </pre> <p>Further suppose that you need to match the date format rather accurately so the regex can filter out valid records, leaving invalid records for human inspection. In most regex flavors you could easily do this with this regex, using <A HREF="freespacing.html" TARGET="_top">free-spacing syntax</A>:</p> <p><TT CLASS=syntax><SPAN CLASS="regexspecial">^</SPAN><SPAN CLASS="regexplain">Name:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexspecial">.</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">\r</SPAN><SPAN CLASS="regexspecial">?</SPAN><SPAN CLASS="regexspecial">\n</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexplain">Born:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexnest1">(?:</SPAN><SPAN CLASS="regexplain">3</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">01</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">12</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">1-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexplain"><BR>        </SPAN><SPAN CLASS="regexplain">-</SPAN><SPAN CLASS="regexnest1">(?:</SPAN><SPAN CLASS="regexplain">Jan</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Feb</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Mar</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Apr</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">May</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Jun</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Jul</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Aug</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Sep</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Oct</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Nov</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Dec</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexplain"><BR>        </SPAN><SPAN CLASS="regexplain">-</SPAN><SPAN CLASS="regexnest1">(?:</SPAN><SPAN CLASS="regexplain">19</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">20</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexspecial">\r</SPAN><SPAN CLASS="regexspecial">?</SPAN><SPAN CLASS="regexspecial">\n</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexplain">Admitted:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexnest1">(?:</SPAN><SPAN CLASS="regexplain">3</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">01</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">12</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">1-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexplain"><BR>            </SPAN><SPAN CLASS="regexplain">-</SPAN><SPAN CLASS="regexnest1">(?:</SPAN><SPAN CLASS="regexplain">Jan</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Feb</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Mar</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Apr</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">May</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Jun</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Jul</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Aug</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Sep</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Oct</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Nov</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Dec</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexplain"><BR>            </SPAN><SPAN CLASS="regexplain">-</SPAN><SPAN CLASS="regexnest1">(?:</SPAN><SPAN CLASS="regexplain">19</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">20</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexspecial">\r</SPAN><SPAN CLASS="regexspecial">?</SPAN><SPAN CLASS="regexspecial">\n</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexplain">Released:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexnest1">(?:</SPAN><SPAN CLASS="regexplain">3</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">01</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">12</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">1-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexplain"><BR>            </SPAN><SPAN CLASS="regexplain">-</SPAN><SPAN CLASS="regexnest1">(?:</SPAN><SPAN CLASS="regexplain">Jan</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Feb</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Mar</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Apr</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">May</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Jun</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Jul</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Aug</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Sep</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Oct</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Nov</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">Dec</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexplain"><BR>            </SPAN><SPAN CLASS="regexplain">-</SPAN><SPAN CLASS="regexnest1">(?:</SPAN><SPAN CLASS="regexplain">19</SPAN><SPAN CLASS="regexnest1">|</SPAN><SPAN CLASS="regexplain">20</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexspecial">$</SPAN></TT></p> <p>With subroutine calls you can make this regex much shorter, easier to read, and easier to maintain:</p> <p><TT CLASS=syntax><SPAN CLASS="regexspecial">^</SPAN><SPAN CLASS="regexplain">Name:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexspecial">.</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">\r</SPAN><SPAN CLASS="regexspecial">?</SPAN><SPAN CLASS="regexspecial">\n</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexplain">Born:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexnest1">(?'date'</SPAN><SPAN CLASS="regexnest2">(?:</SPAN><SPAN CLASS="regexplain">3</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">01</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">12</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">1-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexplain"><BR>                </SPAN><SPAN CLASS="regexplain">-</SPAN><SPAN CLASS="regexnest2">(?:</SPAN><SPAN CLASS="regexplain">Jan</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Feb</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Mar</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Apr</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">May</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Jun</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Jul</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Aug</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Sep</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Oct</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Nov</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Dec</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexplain"><BR>                </SPAN><SPAN CLASS="regexplain">-</SPAN><SPAN CLASS="regexnest2">(?:</SPAN><SPAN CLASS="regexplain">19</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">20</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">\r</SPAN><SPAN CLASS="regexspecial">?</SPAN><SPAN CLASS="regexspecial">\n</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexplain">Admitted:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexspecial">\g'date'</SPAN><SPAN CLASS="regexspecial">\r</SPAN><SPAN CLASS="regexspecial">?</SPAN><SPAN CLASS="regexspecial">\n</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexplain">Released:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexspecial">\g'date'</SPAN><SPAN CLASS="regexspecial">$</SPAN></TT></p> <a name="define"></a><h2>Separate Subroutine Definitions</h2> <p>In Perl, PCRE, and JGsoft V2, you can take this one step further using the special DEFINE group: <TT CLASS=syntax><SPAN CLASS="regexnest1">(?(DEFINE)</SPAN><SPAN CLASS="regexnest2">(?'subroutine'</SPAN><SPAN CLASS="regexplain">regex</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexnest1">)</SPAN></TT>. While this looks like a <A HREF="conditional.html" TARGET="_top">conditional</A> that references the non-existent group DEFINE containing a single named group “subroutine”, the DEFINE group is a special syntax. The fixed text <TT CLASS=syntax><SPAN CLASS="regexnest1">(?(DEFINE)</SPAN></TT> opens the group. A parenthesis closes the group. This special group tells the regex engine to ignore its contents, other than to parse it for named and numbered capturing groups. You can put as many capturing groups inside the DEFINE group as you like. The DEFINE group itself never matches anything, and never fails to match. It is completely ignored. The regex <TT CLASS=syntax><SPAN CLASS="regexplain">foo</SPAN><SPAN CLASS="regexnest1">(?(DEFINE)</SPAN><SPAN CLASS="regexnest2">(?'subroutine'</SPAN><SPAN CLASS="regexplain">skipped</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexplain">bar</SPAN></TT> matches <tt class=match>foobar</tt>. The DEFINE group is completely superfluous in this regex, as there are no calls to any of the groups inside it.</p> <p>With a DEFINE group, our regex becomes:</p> <p><TT CLASS=syntax><SPAN CLASS="regexnest1">(?(DEFINE)</SPAN><SPAN CLASS="regexnest2">(?'date'</SPAN><SPAN CLASS="regexnest3">(?:</SPAN><SPAN CLASS="regexplain">3</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">01</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">12</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">1-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest3">)</SPAN><SPAN CLASS="regexplain"><BR>                   </SPAN><SPAN CLASS="regexplain">-</SPAN><SPAN CLASS="regexnest3">(?:</SPAN><SPAN CLASS="regexplain">Jan</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexplain">Feb</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexplain">Mar</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexplain">Apr</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexplain">May</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexplain">Jun</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexplain">Jul</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexplain">Aug</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexplain">Sep</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexplain">Oct</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexplain">Nov</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexplain">Dec</SPAN><SPAN CLASS="regexnest3">)</SPAN><SPAN CLASS="regexplain"><BR>                   </SPAN><SPAN CLASS="regexplain">-</SPAN><SPAN CLASS="regexnest3">(?:</SPAN><SPAN CLASS="regexplain">19</SPAN><SPAN CLASS="regexnest3">|</SPAN><SPAN CLASS="regexplain">20</SPAN><SPAN CLASS="regexnest3">)</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexspecial">^</SPAN><SPAN CLASS="regexplain">Name:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexspecial">.</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">\r</SPAN><SPAN CLASS="regexspecial">?</SPAN><SPAN CLASS="regexspecial">\n</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexplain">Born:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexspecial">(?P&gt;date)</SPAN><SPAN CLASS="regexspecial">\r</SPAN><SPAN CLASS="regexspecial">?</SPAN><SPAN CLASS="regexspecial">\n</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexplain">Admitted:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexspecial">(?P&gt;date)</SPAN><SPAN CLASS="regexspecial">\r</SPAN><SPAN CLASS="regexspecial">?</SPAN><SPAN CLASS="regexspecial">\n</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexplain">Released:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexspecial">(?P&gt;date)</SPAN><SPAN CLASS="regexspecial">$</SPAN></TT></p> <h2>Quantifiers On Subroutine Calls</h2> <p>Quantifiers on subroutine calls work just like a <A HREF="recurserepeat.html" TARGET="_top">quantifier on recursion</A>. The call is repeated as many times in sequence as needed to satisfy the quantifier. <TT CLASS=syntax><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">(?1)</SPAN><SPAN CLASS="regexspecial">{3}</SPAN></TT> matches <tt class=match>abcb</tt> and any other combination of four-letter combination of the first three letters of the alphabet. First the group matches once, and then the call matches three times. This regex is equivalent to <TT CLASS=syntax><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexspecial">{3}</SPAN></TT>.</p> <p>Quantifiers on the group are ignored by the subroutine call. <TT CLASS=syntax><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">{3}</SPAN><SPAN CLASS="regexspecial">(?1)</SPAN></TT> also matches <tt class=match>abcb</tt>. First, the group matches three times, because it has a quantifier. Then the subroutine call matches once, because it has no quantifier. <TT CLASS=syntax><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">{3}</SPAN><SPAN CLASS="regexspecial">(?1)</SPAN><SPAN CLASS="regexspecial">{3}</SPAN></TT> matches six letters, such as <tt class=match>abbcab</tt>, because now both the group and the call are repeated 3 times. These two regexes are equivalent to <TT CLASS=syntax><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">{3}</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN></TT> and <TT CLASS=syntax><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">{3}</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">abc</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexspecial">{3}</SPAN></TT>.</p> <p>While Ruby does not support subroutine definition groups, it does support subroutine calls to groups that are repeated zero times. <TT CLASS=syntax><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexplain">a</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">{0}</SPAN><SPAN CLASS="regexspecial">\g&lt;1&gt;</SPAN><SPAN CLASS="regexspecial">{3}</SPAN></TT> matches <tt class=match>aaa</tt>. The group itself is skipped because it is repeated zero times. Then the subroutine call matches three times, according to its quantifier. This also works in PCRE 7.7 and later. It doesn’t work (reliably) in older versions of PCRE or in any version of Perl because of bugs.</p> <p>The Ruby version of the patient record example can be further cleaned up as:</p> <p><TT CLASS=syntax><SPAN CLASS="regexnest1">(?'date'</SPAN><SPAN CLASS="regexnest2">(?:</SPAN><SPAN CLASS="regexplain">3</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">01</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccliteral">12</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">1-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexplain"><BR>         </SPAN><SPAN CLASS="regexplain">-</SPAN><SPAN CLASS="regexnest2">(?:</SPAN><SPAN CLASS="regexplain">Jan</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Feb</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Mar</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Apr</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">May</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Jun</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Jul</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Aug</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Sep</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Oct</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Nov</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">Dec</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexplain"><BR>         </SPAN><SPAN CLASS="regexplain">-</SPAN><SPAN CLASS="regexnest2">(?:</SPAN><SPAN CLASS="regexplain">19</SPAN><SPAN CLASS="regexnest2">|</SPAN><SPAN CLASS="regexplain">20</SPAN><SPAN CLASS="regexnest2">)</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccrange">0-9</SPAN><SPAN CLASS="regexccopen">]</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">{0}</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexspecial">^</SPAN><SPAN CLASS="regexplain">Name:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexnest1">(</SPAN><SPAN CLASS="regexspecial">.</SPAN><SPAN CLASS="regexspecial">*</SPAN><SPAN CLASS="regexnest1">)</SPAN><SPAN CLASS="regexspecial">\r</SPAN><SPAN CLASS="regexspecial">?</SPAN><SPAN CLASS="regexspecial">\n</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexplain">Born:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexspecial">\g'date'</SPAN><SPAN CLASS="regexspecial">\r</SPAN><SPAN CLASS="regexspecial">?</SPAN><SPAN CLASS="regexspecial">\n</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexplain">Admitted:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexspecial">\g'date'</SPAN><SPAN CLASS="regexspecial">\r</SPAN><SPAN CLASS="regexspecial">?</SPAN><SPAN CLASS="regexspecial">\n</SPAN><SPAN CLASS="regexplain"><BR> </SPAN><SPAN CLASS="regexplain">Released:</SPAN><SPAN CLASS="regexescaped">\ </SPAN><SPAN CLASS="regexspecial">\g'date'</SPAN><SPAN CLASS="regexspecial">$</SPAN></TT></p> <div id=cntmobi><p>|&ensp;<a href='quickstart.html'>Quick&nbsp;Start</a>&ensp;|&ensp;<a href='tutorial.html'>Tutorial</a>&ensp;|&ensp;<a href='tools.html'>Tools&nbsp;&amp;&nbsp;Languages</a>&ensp;|&ensp;<a href='examples.html'>Examples</a>&ensp;|&ensp;<a href='refflavors.html'>Reference</a>&ensp;|&ensp;<a href='books.html'>Book&nbsp;Reviews</a>&ensp;|</p><p>|&ensp;<a href='tutorial.html'>Introduction</a>&ensp;|&ensp;<a href='tutorialcnt.html'>Table of Contents</a>&ensp;|&ensp;<a href='characters.html'>Special Characters</a>&ensp;|&ensp;<a href='nonprint.html'>Non-Printable Characters</a>&ensp;|&ensp;<a href='engine.html'>Regex Engine Internals</a>&ensp;|&ensp;<a href='charclass.html'>Character Classes</a>&ensp;|&ensp;<a href='charclasssubtract.html'>Character Class Subtraction</a>&ensp;|&ensp;<a href='charclassintersect.html'>Character Class Intersection</a>&ensp;|&ensp;<a href='shorthand.html'>Shorthand Character Classes</a>&ensp;|&ensp;<a href='dot.html'>Dot</a>&ensp;|&ensp;<a href='anchors.html'>Anchors</a>&ensp;|&ensp;<a href='wordboundaries.html'>Word Boundaries</a>&ensp;|&ensp;<a href='alternation.html'>Alternation</a>&ensp;|&ensp;<a href='optional.html'>Optional Items</a>&ensp;|&ensp;<a href='repeat.html'>Repetition</a>&ensp;|&ensp;<a href='brackets.html'>Grouping &amp; Capturing</a>&ensp;|&ensp;<a href='backref.html'>Backreferences</a>&ensp;|&ensp;<a href='backref2.html'>Backreferences, part 2</a>&ensp;|&ensp;<a href='named.html'>Named Groups</a>&ensp;|&ensp;<a href='backrefrel.html'>Relative Backreferences</a>&ensp;|&ensp;<a href='branchreset.html'>Branch Reset Groups</a>&ensp;|&ensp;<a href='freespacing.html'>Free-Spacing &amp; Comments</a>&ensp;|&ensp;<a href='unicode.html'>Unicode</a>&ensp;|&ensp;<a href='modifiers.html'>Mode Modifiers</a>&ensp;|&ensp;<a href='atomic.html'>Atomic Grouping</a>&ensp;|&ensp;<a href='possessive.html'>Possessive Quantifiers</a>&ensp;|&ensp;<a href='lookaround.html'>Lookahead &amp; Lookbehind</a>&ensp;|&ensp;<a href='lookaround2.html'>Lookaround, part 2</a>&ensp;|&ensp;<a href='keep.html'>Keep Text out of The Match</a>&ensp;|&ensp;<a href='conditional.html'>Conditionals</a>&ensp;|&ensp;<a href='balancing.html'>Balancing Groups</a>&ensp;|&ensp;<a href='recurse.html'>Recursion</a>&ensp;|&ensp;<a href='subroutine.html'>Subroutines</a>&ensp;|&ensp;<a href='recurseinfinite.html'>Infinite Recursion</a>&ensp;|&ensp;<a href='recurserepeat.html'>Recursion &amp; Quantifiers</a>&ensp;|&ensp;<a href='recursecapture.html'>Recursion &amp; Capturing</a>&ensp;|&ensp;<a href='recursebackref.html'>Recursion &amp; Backreferences</a>&ensp;|&ensp;<a href='recursebacktrack.html'>Recursion &amp; Backtracking</a>&ensp;|&ensp;<a href='posixbrackets.html'>POSIX Bracket Expressions</a>&ensp;|&ensp;<a href='zerolength.html'>Zero-Length Matches</a>&ensp;|&ensp;<a href='continue.html'>Continuing Matches</a>&ensp;|</p></div> <div id=copyright> <P CLASS=copyright>Page URL: <A HREF="https://www.regular-expressions.info/subroutine.html" TARGET="_top">https://www.regular-expressions.info/subroutine.html</A><BR> Page last updated: 21 May 2024<BR> Site last updated: 06 November 2024<BR> Copyright &copy; 2003-2024 Jan Goyvaerts. All rights reserved.</P> </div> </div> </div> </body></html>

Pages: 1 2 3 4 5 6 7 8 9 10