CINXE.COM

Tcl regexp and regsub - Regular Expressions in Tcl

<!DOCTYPE html> <html lang="en"><head><meta charset="utf-8"><link rel=canonical href='https://https://www.regular-expressions.info//tcl.html'><title>Tcl regexp and regsub - Regular Expressions in Tcl</title> <meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="author" content="Jan Goyvaerts"> <meta name="description" content="Learn how to use regular expressions with Tcl’s regexp and regsub commands"> <meta name="keywords" content=""> <link rel=stylesheet href="regex.css" type="text/css"><script src="theme.js" type="text/javascript"></script><link rel="alternate" type="application/rss+xml" title="New at Regular-Expressions.info" href="updates.xml"> </head> <body bgcolor=white text=black> <div id=top></div> <div id=btntop><div id=btngrid><a href="quickstart.html" target="_top"><div>Quick&nbsp;Start</div></a><a href="tutorial.html" target="_top"><div>Tutorial</div></a><a href="tools.html" target="_top"><div>Tools&nbsp;&amp;&nbsp;Languages</div></a><a href="examples.html" target="_top"><div>Examples</div></a><a href="refflavors.html" target="_top"><div>Reference</div></a><a href="books.html" target="_top"><div>Book&nbsp;Reviews</div></a></div></div> <div id=contents><div id=side> <TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>Regex Tools</TD></TR><TR><TD><A HREF="grep.html" TARGET=_top>grep</A></TD></TR><TR><TD><A HREF="powergrep.html" TARGET=_top>PowerGREP</A></TD></TR><TR><TD><A HREF="regexbuddy.html" TARGET=_top>RegexBuddy</A></TD></TR><TR><TD><A HREF="regexmagic.html" TARGET=_top>RegexMagic</A></TD></TR> </TABLE><TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>General Applications</TD></TR><TR><TD><A HREF="editpadlite.html" TARGET=_top>EditPad Lite</A></TD></TR><TR><TD><A HREF="editpadpro.html" TARGET=_top>EditPad Pro</A></TD></TR> </TABLE><TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>Languages &amp; Libraries</TD></TR><TR><TD><A HREF="boost.html" TARGET=_top>Boost</A></TD></TR><TR><TD><A HREF="delphi.html" TARGET=_top>Delphi</A></TD></TR><TR><TD><A HREF="gnu.html" TARGET=_top>GNU (Linux)</A></TD></TR><TR><TD><A HREF="groovy.html" TARGET=_top>Groovy</A></TD></TR><TR><TD><A HREF="java.html" TARGET=_top>Java</A></TD></TR><TR><TD><A HREF="javascript.html" TARGET=_top>JavaScript</A></TD></TR><TR><TD><A HREF="dotnet.html" TARGET=_top>.NET</A></TD></TR><TR><TD><A HREF="pcre.html" TARGET=_top>PCRE (C/C++)</A></TD></TR><TR><TD><A HREF="pcre2.html" TARGET=_top>PCRE2 (C/C++)</A></TD></TR><TR><TD><A HREF="perl.html" TARGET=_top>Perl</A></TD></TR><TR><TD><A HREF="php.html" TARGET=_top>PHP</A></TD></TR><TR><TD><A HREF="posix.html" TARGET=_top>POSIX</A></TD></TR><TR><TD><A HREF="powershell.html" TARGET=_top>PowerShell</A></TD></TR><TR><TD><A HREF="python.html" TARGET=_top>Python</A></TD></TR><TR><TD><A HREF="rlanguage.html" TARGET=_top>R</A></TD></TR><TR><TD><A HREF="ruby.html" TARGET=_top>Ruby</A></TD></TR><TR><TD><A HREF="stdregex.html" TARGET=_top>std::regex</A></TD></TR><TR><TD><A HREF="tcl.html" TARGET=_top>Tcl</A></TD></TR><TR><TD><A HREF="vbscript.html" TARGET=_top>VBScript</A></TD></TR><TR><TD><A HREF="vb.html" TARGET=_top>Visual Basic 6</A></TD></TR><TR><TD><A HREF="wxwidgets.html" TARGET=_top>wxWidgets</A></TD></TR><TR><TD><A HREF="xml.html" TARGET=_top>XML Schema</A></TD></TR><TR><TD><A HREF="realbasic.html" TARGET=_top>Xojo</A></TD></TR><TR><TD><A HREF="xpath.html" TARGET=_top>XQuery &amp; XPath</A></TD></TR><TR><TD><A HREF="xregexp.html" TARGET=_top>XRegExp</A></TD></TR> </TABLE><TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>Databases</TD></TR><TR><TD><A HREF="mysql.html" TARGET=_top>MySQL</A></TD></TR><TR><TD><A HREF="oracle.html" TARGET=_top>Oracle</A></TD></TR><TR><TD><A HREF="postgresql.html" TARGET=_top>PostgreSQL</A></TD></TR> </TABLE><TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>More on This Site</TD></TR><TR><TD><A HREF="index.html" TARGET=_top>Introduction</A></TD></TR><TR><TD><A HREF="quickstart.html" TARGET=_top>Regular Expressions Quick Start</A></TD></TR><TR><TD><A HREF="tutorial.html" TARGET=_top>Regular Expressions Tutorial</A></TD></TR><TR><TD><A HREF="replacetutorial.html" TARGET=_top>Replacement Strings Tutorial</A></TD></TR><TR><TD><A HREF="tools.html" TARGET=_top>Applications and Languages</A></TD></TR><TR><TD><A HREF="examples.html" TARGET=_top>Regular Expressions Examples</A></TD></TR><TR><TD><A HREF="refflavors.html" TARGET=_top>Regular Expressions Reference</A></TD></TR><TR><TD><A HREF="refreplace.html" TARGET=_top>Replacement Strings Reference</A></TD></TR><TR><TD><A HREF="books.html" TARGET=_top>Book Reviews</A></TD></TR><TR><TD><A HREF="print.html" TARGET=_top>Printable PDF</A></TD></TR><TR><TD><A HREF="about.html" TARGET=_top>About This Site</A></TD></TR><TR><TD><A HREF="updates.html" TARGET=_top>RSS Feed &amp; Blog</A></TD></TR></TABLE></DIV><div class=bodytext><div class=topad><A HREF="https://www.regexbuddy.com/tcl.html" TARGET="_top"><picture><source media="(max-width: 370px)" srcset="ads/320/rxbtcl100.png 1x, ads/320/rxbtcl150.png 1.5x, ads/320/rxbtcl200.png 2x, ads/320/rxbtcl250.png 2.5x, ads/320/rxbtcl300.png 3x, ads/320/rxbtcl350.png 3.5x, ads/320/rxbtcl400.png 4x"><source media="(max-width: 500px)" srcset="ads/360/rxbtcl100.png 1x, ads/360/rxbtcl150.png 1.5x, ads/360/rxbtcl200.png 2x, ads/360/rxbtcl250.png 2.5x, ads/360/rxbtcl300.png 3x, ads/360/rxbtcl350.png 3.5x, ads/360/rxbtcl400.png 4x"><source media="(max-width: 660px)" srcset="ads/480/rxbtcl100.png 1x, ads/480/rxbtcl150.png 1.5x, ads/480/rxbtcl200.png 2x, ads/480/rxbtcl250.png 2.5x, ads/480/rxbtcl300.png 3x, ads/480/rxbtcl350.png 3.5x, ads/480/rxbtcl400.png 4x"><source media="(max-width: 747px)" srcset="ads/640/rxbtcl100.png 1x, ads/640/rxbtcl150.png 1.5x, ads/640/rxbtcl200.png 2x, ads/640/rxbtcl250.png 2.5x, ads/640/rxbtcl300.png 3x, ads/640/rxbtcl350.png 3.5x, ads/640/rxbtcl400.png 4x"><img src="ads/728/rxbtcl100.png" srcset="ads/728/rxbtcl100.png 1x, ads/728/rxbtcl125.png 1.25x, ads/728/rxbtcl150.png 1.5x, ads/728/rxbtcl175.png 1.75x, ads/728/rxbtcl200.png 2x, ads/728/rxbtcl250.png 2.5x, ads/728/rxbtcl300.png 3x, ads/728/rxbtcl350.png 3.5x, ads/728/rxbtcl400.png 4x" alt="RegexBuddy—The best regex editor and tester for Tcl developers!"></picture></A></div> <div class=bulb><h1>Tcl Has Three Regular Expression Flavors</h1><script type="text/javascript">showbulb();</script></div> <p>Tcl 8.2 and later support three regular expression flavors. The Tcl man pages dub them Basic Regular Expressions (BRE), Extended Regular Expressions (ERE) and Advanced Regular Expressions (ARE). BRE and ERE are mainly for backward compatibility with previous versions of Tcl. These flavor implement the two flavors defined in the <A HREF="posix.html" TARGET="_top">POSIX standard</A>. AREs are new in Tcl 8.2. They’re the default and recommended flavor. This flavor implements the POSIX ERE flavor, with a whole bunch of added features. Most of these features are inspired by similar features in <A HREF="perl.html" TARGET="_top">Perl regular expressions</A>.<p> <p>Tcl’s regular expression support is based on a library developed for Tcl by Henry Spencer. This library has since been used in a number of other programming languages and applications, such as the <A HREF="postgresql.html" TARGET="_top">PostgreSQL database</A> and the <A HREF="wxwidgets.html" TARGET="_top">wxWidgets GUI library</A> for C++. Everything said about Tcl in this <A HREF="tutorial.html" TARGET="_top">regular expressions tutorial</A> applies to any tool that uses Henry Spencer’s Advanced Regular Expressions.</p> <p>There are a number of important differences between Tcl Advanced Regular Expressions and Perl-style regular expressions. Tcl uses <TT CLASS=syntax><SPAN CLASS="regexspecial">\m</SPAN></TT>, <TT CLASS=syntax><SPAN CLASS="regexspecial">\M</SPAN></TT>, <TT CLASS=syntax><SPAN CLASS="regexspecial">\y</SPAN></TT> and <TT CLASS=syntax><SPAN CLASS="regexspecial">\Y</SPAN></TT> for <A HREF="wordboundaries.html" TARGET="_top">word boundaries</A>. Perl and most other modern regex flavors use <TT CLASS=syntax><SPAN CLASS="regexspecial">\b</SPAN></TT> and <TT CLASS=syntax><SPAN CLASS="regexspecial">\B</SPAN></TT>. In Tcl, these last two match a backspace and a backslash, respectively.</p> <p><a name="modifiers"></a>Tcl also takes a completely different approach to <A HREF="modifiers.html" TARGET="_top">mode modifiers</A>. The <tt class=code>(?letters)</tt> syntax is the same, but the available mode letters and their meanings are quite different. Instead of adding mode modifiers to the regular expression, you can pass more descriptive switches like <tt>-nocase</tt> to the <tt>regexp</tt> and <tt>regsub</tt> commands for some of the modes. Mode modifier spans in the style of <tt class=code>(?modes:regex)</tt> are not supported. Mode modifiers must appear at the start of the regex. They affect the whole regex. Mode modifiers in the regex override command switches. Tcl supports these modes:</p> <ul> <li><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">i</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> or <tt class=code>-nocase</tt> makes the regex match case insensitive.</li> <li><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">c</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> makes the regex match case sensitive. This mode is the default.</li> <li><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">x</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> or <tt class=code>-expanded</tt> activates the <A HREF="freespacing.html" TARGET="_top">free-spacing regexp syntax</A>.</li> <li><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">t</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> disables the <A HREF="freespacing.html" TARGET="_top">free-spacing regexp syntax</A>. This mode is the default. The “t” stands for “tight”, the opposite of “expanded”.</li> <li><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">b</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> tells Tcl to interpret the remainder of the regular expression as a <a href="posix.html#bre">Basic Regular Expression</a>.</li> <li><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">e</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> tells Tcl to interpret the remainder of the regular expression as an <a href="posix.html#ere">Extended Regular Expression</a>.</li> <li><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">q</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> tells Tcl to interpret the remainder of the regular expression as plain text. The “q” stands for “quoted”.</li> <li><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">s</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> selects “non-newline-sensitive matching”, which is the default. The “s” stands for “single line”. In this mode, the <A HREF="dot.html" TARGET="_top">dot</A> and <a href="charclass.html#negated">negated character classes</a> match all characters, including newlines. The <A HREF="anchors.html" TARGET="_top">caret and dollar</A> match only at the very start and end of the subject string.</li> <li><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">p</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> or <tt class=code>-linestop</tt> enables “partial newline-sensitive matching”. In this mode, the <A HREF="dot.html" TARGET="_top">dot</A> and <a href="charclass.html#negated">negated character classes</a> do not match newlines. The <A HREF="anchors.html" TARGET="_top">caret and dollar</A> match only at the very start and end of the subject string.</li> <li><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">w</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> or <tt class=code>-lineanchor</tt> enables “inverse partial newline-sensitive matching”. The “w” stands for “weird”. (Don’t look at me! I didn’t come up with this.) In this mode, the <A HREF="dot.html" TARGET="_top">dot</A> and <a href="charclass.html#negated">negated character classes</a> match all characters, including newlines. The <A HREF="anchors.html" TARGET="_top">caret and dollar</A> match after and before newlines.</li> <li><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">n</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> or <tt class=code>-line</tt> enables what Tcl calls “newline-sensitive matching”. The <A HREF="dot.html" TARGET="_top">dot</A> and <a href="charclass.html#negated">negated character classes</a> do not match newlines. The <A HREF="anchors.html" TARGET="_top">caret and dollar</A> match after and before newlines. Specifying <TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">n</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> or <tt class=code>-line</tt> is the same as specifying <TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">p</SPAN><SPAN CLASS="regexmeta">w</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> or <tt class=code>-linestop -lineanchor</tt>.</li> <li><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">m</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> is a historical synonym for <TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">n</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT>.</li> </ul> <p>If you use regular expressions with Tcl and other programming languages, be careful when dealing with the newline-related matching modes. Tcl’s designers found Perl’s <tt class=code>/m</tt> and <tt class=code>/s</tt> modes confusing. They are confusing, but at least Perl has only two, and they both affect only one thing. In Perl, <tt class=code>/m</tt> or <TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">m</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> enables “multi-line mode”, which makes the <A HREF="anchors.html" TARGET="_top">caret and dollar</A> match after and before newlines. By default, they match at the very start and end of the string only. In Perl, <tt class=code>/s</tt> or <TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">s</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> enables “single line mode”. This mode makes the <A HREF="dot.html" TARGET="_top">dot</A> match all characters, including line break. By default, it doesn’t match line breaks. Perl does not have a mode modifier to exclude line breaks from <a href="charclass.html#negated">negated character classes</a>. In Perl, <TT CLASS=syntax><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccspecial">^</SPAN><SPAN CLASS="regexccliteral">a</SPAN><SPAN CLASS="regexccopen">]</SPAN></TT> matches anything except <tt class=string>a</tt>, including newlines. The only way to exclude newlines is to write <TT CLASS=syntax><SPAN CLASS="regexccopen">[</SPAN><SPAN CLASS="regexccspecial">^</SPAN><SPAN CLASS="regexccliteral">a</SPAN><SPAN CLASS="regexccspecial">\n</SPAN><SPAN CLASS="regexccopen">]</SPAN></TT>. Perl’s default matching mode is like Tcl’s <TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">p</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT>, except for the difference in negated character classes.</p> <p>Why compare Tcl with Perl? Many popular regex flavors such as <A HREF="dotnet.html" TARGET="_top">.NET</A>, <A HREF="java.html" TARGET="_top">Java</A>, <A HREF="pcre.html" TARGET="_top">PCRE</A> and <A HREF="python.html" TARGET="_top">Python</A> support the same <TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">m</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> and <TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">s</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> modifiers with the exact same defaults and effects as in Perl. Negated character classes work the same in all these languages and libraries. It’s unfortunate that Tcl didn’t follow Perl’s standard, since Tcl’s four options are just as confusing as Perl’s two options. Together they make a very nice alphabet soup.</p> <p>If you ignore the fact that Tcl’s options affect negated character classes, you can use the following table to translate between Tcl’s newline modes and Perl-style newline modes. Note that the defaults are different. If you don’t use any switches, <TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">s</SPAN><SPAN CLASS="regexmeta">)</SPAN><SPAN CLASS="regexspecial">.</SPAN></TT> and <TT CLASS=syntax><SPAN CLASS="regexspecial">.</SPAN></TT> are equivalent in Tcl, but not in Perl.</p> <table class=reference> <tr><th>Tcl</th><th>Perl</th><th>Anchors</th><th>Dot</th></tr> <tr><td><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">s</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT> (default)</td><td><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">s</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT></td><td>Start and end of string only</td><td>Any character</td></tr> <tr><td><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">p</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT></td><td>(default)</td><td>Start and end of string only</td><td>Any character except newlines</td></tr> <tr><td><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">w</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT></td><td><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">s</SPAN><SPAN CLASS="regexmeta">m</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT></td><td>Start and end of string, and at newlines</td><td>Any character</td></tr> <tr><td><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">n</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT></td><td><TT CLASS=syntax><SPAN CLASS="regexmeta">(?</SPAN><SPAN CLASS="regexmeta">m</SPAN><SPAN CLASS="regexmeta">)</SPAN></TT></td><td>Start and end of string, and at newlines</td><td>Any character except newlines</td></tr> </table> <h2>Regular Expressions as Tcl Words</h2> <p>You can insert regular expressions in your Tcl source code either by enclosing them with double quotes (e.g. <tt class=code>&quot;my regexp&quot;</tt>) or by enclosing them with curly braces (e.g. <tt class=code>{my regexp}</tt>. Since the braces don’t do any substitution like the quotes, they’re by far the best choice for regular expressions.</p> <p>The only thing you need to worry about is that unescaped braces in the regular expression must be balanced. Escaped braces don’t need to be balanced, but the backslash used to escape the brace remains part of the regular expression. You can easily satisfy these requirements by escaping all braces in your regular expression, except those used as a <a href="repeat.html#limit">quantifier</a>. This way your regex will work as expected, and you don’t need to change it at all when pasting it into your Tcl source code, other than putting a pair of braces around it.</p> <p>The regular expression <TT CLASS=syntax><SPAN CLASS="regexspecial">^</SPAN><SPAN CLASS="regexescaped">\{</SPAN><SPAN CLASS="regexspecial">\d</SPAN><SPAN CLASS="regexspecial">{3}</SPAN><SPAN CLASS="regexescaped">\\</SPAN><SPAN CLASS="regexspecial">$</SPAN></TT> matches a string that consists entirely of an opening brace, three digits and one backslash. In Tcl, this becomes <tt>{^\{\d+{3}$\\}</tt>. There’s no doubling of backslashes or any sort of escaping needed, as long as you escape literal braces in the regular expression. <TT CLASS=syntax><SPAN CLASS="regexplain">{</SPAN></TT> and <TT CLASS=syntax><SPAN CLASS="regexescaped">\{</SPAN></TT> are both valid regular expressions to match a single opening brace in a Tcl ARE (and any Perl-style regex flavor, for that matter). Only the latter works correctly in a Tcl literal enclosed with braces.</p> <h2>Finding Regex Matches</h2> <p>It Tcl, you can use the <tt>regexp</tt> command to test if a regular expression matches (part of) a string, and to retrieve the matched part(s). The syntax of the command is:</p> <p><tt><b>regexp</b></tt> <i>?switches? regexp subject ?matchvar? ?group1var group2var ...?</i></p> <p>Immediately after the <tt>regexp</tt> command, you can place zero or more switches from the list above to indicate how Tcl should apply the regular expression. The only required parameters are the regular expression and the subject string. You can specify a literal regular expression using braces as I just explained. Or, you can reference any string variable holding a regular expression read from a file or user input.</p> <p>If you pass the name of a variable as an additional argument, Tcl stores the part of the string matched by the regular expression into that variable. Tcl does <i>not</i> set the variable to an empty string if the match attempt fails. If the regular expressions has capturing groups, you can add additional variable names to capture the text matched by each group. If you specify fewer variables than the regex has capturing groups, the text matched by the additional groups is not stored. If you specify more variables than the regex has capturing groups, the additional variables are set to an empty string if the overall regex match was successful.</p> <p>The <tt>regexp</tt> command returns 1 if (part of) the string could be matched, and zero if there’s no match. The following script applies the regular expression <TT CLASS=syntax><SPAN CLASS="regexplain">my regex</SPAN></TT> case insensitively to the string stored in the variable <tt>subjectstring</tt> and displays the result:</p> <pre><span class=longcode><b>if</b> [ <b>regexp</b> -nocase {my regex} $subjectstring matchresult ] then { <b>puts</b> $matchresult } else { <b>puts</b> &quot;my regex could not match the subject string&quot; }</span></pre> <p>The <tt>regexp</tt> command supports three more switches that aren’t regex mode modifiers. The <tt><b>-all</b></tt> switch causes the command to return a number indicating how many times the regex could be matched. The variables storing the regex and group matches will store the last match in the string only.</p> <p>The <tt><b>-inline</b></tt> switch tells the <tt>regexp</tt> command to return an array with the substring matched by the regular expression and all substrings matched by all capturing groups. If you also specify the <tt>-all</tt> switch, the array will contain the first regex match, all the group matches of the first match, then the second regex match, the group matches of the first match, etc.</p> <p>The <tt><b>-start</b></tt> switch must be followed by a number (as a separate Tcl word) that indicates the character offset in the subject string at which Tcl should attempt the match. Everything before the starting position will be invisible to the regex engine. This means that <TT CLASS=syntax><SPAN CLASS="regexspecial">\A</SPAN></TT> will match at the character offset you specify with <tt>-start</tt>, even if that position is not at the start of the string.</p> <h2>Replacing Regex Matches</h2> <p>With the <tt>regsub</tt> command, you can replace regular expression matches in a string.</p> <p><tt><b>regsub</b></tt> <i>?switches? regexp subject replacement ?resultvar?</i></p> <p>Just like the <tt>regexp</tt> command, <tt>regsub</tt> takes zero or more switches followed by a regular expression. It supports the same switches, except for <tt>-inline</tt>. Remember to specify <tt>-all</tt> if you want to replace all matches in the string.</p> <p>The argument after the regexp should be the replacement text. You can specify a literal replacement using the brace syntax, or reference a string variable. The <tt>regsub</tt> command recognizes a few metacharacters in the replacement text. You can use <TT CLASS=syntax><SPAN CLASS="regexspecial">\0</SPAN></TT> as a placeholder for the whole regex match, and <TT CLASS=syntax><SPAN CLASS="regexspecial">\1</SPAN></TT> through <TT CLASS=syntax><SPAN CLASS="regexspecial">\9</SPAN></TT> for the text matched by one of the first nine <A HREF="brackets.html" TARGET="_top">capturing groups</A>. You can also use <TT CLASS=syntax><SPAN CLASS="regexspecial">&amp;</SPAN></TT> as a synonym of <TT CLASS=syntax><SPAN CLASS="regexspecial">\0</SPAN></TT>. Note that there’s no backslash in front of the ampersand. <TT CLASS=syntax><SPAN CLASS="regexspecial">&amp;</SPAN></TT> is substituted with the whole regex match, while <TT CLASS=syntax><SPAN CLASS="regexescaped">\&amp;</SPAN></TT> is substituted with a literal ampersand. Use <TT CLASS=syntax><SPAN CLASS="regexescaped">\\</SPAN></TT> to insert a literal backslash. You only need to escape backslashes if they’re followed by a digit, to prevent the combination from being seen as a backreference. Again, to prevent unnecessary duplication of backslashes, you should enclose the replacement text with braces instead of double quotes. The replacement text <TT CLASS=syntax><SPAN CLASS="regexspecial">\1</SPAN></TT> becomes <tt>{\1}</tt> when using braces, and <tt>&quot;\\1&quot;</tt> when using quotes.</p> <p>If you pass a variable reference as the final argument, that variable receives the string with the replacements applied, and <tt>regsub</tt> returns an integer indicating the number of replacements made. Tcl 8.4 and later allow you to omit the final argument. In that case <tt>regsub</tt> returns the string with the replacements applied.</p> <div id=cntmobi><p>|&ensp;<a href='quickstart.html'>Quick&nbsp;Start</a>&ensp;|&ensp;<a href='tutorial.html'>Tutorial</a>&ensp;|&ensp;<a href='tools.html'>Tools&nbsp;&amp;&nbsp;Languages</a>&ensp;|&ensp;<a href='examples.html'>Examples</a>&ensp;|&ensp;<a href='refflavors.html'>Reference</a>&ensp;|&ensp;<a href='books.html'>Book&nbsp;Reviews</a>&ensp;|</p><p>|&ensp;<a href='grep.html'>grep</a>&ensp;|&ensp;<a href='powergrep.html'>PowerGREP</a>&ensp;|&ensp;<a href='regexbuddy.html'>RegexBuddy</a>&ensp;|&ensp;<a href='regexmagic.html'>RegexMagic</a>&ensp;|</p><p>|&ensp;<a href='editpadlite.html'>EditPad Lite</a>&ensp;|&ensp;<a href='editpadpro.html'>EditPad Pro</a>&ensp;|</p><p>|&ensp;<a href='boost.html'>Boost</a>&ensp;|&ensp;<a href='delphi.html'>Delphi</a>&ensp;|&ensp;<a href='gnu.html'>GNU (Linux)</a>&ensp;|&ensp;<a href='groovy.html'>Groovy</a>&ensp;|&ensp;<a href='java.html'>Java</a>&ensp;|&ensp;<a href='javascript.html'>JavaScript</a>&ensp;|&ensp;<a href='dotnet.html'>.NET</a>&ensp;|&ensp;<a href='pcre.html'>PCRE (C/C++)</a>&ensp;|&ensp;<a href='pcre2.html'>PCRE2 (C/C++)</a>&ensp;|&ensp;<a href='perl.html'>Perl</a>&ensp;|&ensp;<a href='php.html'>PHP</a>&ensp;|&ensp;<a href='posix.html'>POSIX</a>&ensp;|&ensp;<a href='powershell.html'>PowerShell</a>&ensp;|&ensp;<a href='python.html'>Python</a>&ensp;|&ensp;<a href='rlanguage.html'>R</a>&ensp;|&ensp;<a href='ruby.html'>Ruby</a>&ensp;|&ensp;<a href='stdregex.html'>std::regex</a>&ensp;|&ensp;<a href='tcl.html'>Tcl</a>&ensp;|&ensp;<a href='vbscript.html'>VBScript</a>&ensp;|&ensp;<a href='vb.html'>Visual Basic 6</a>&ensp;|&ensp;<a href='wxwidgets.html'>wxWidgets</a>&ensp;|&ensp;<a href='xml.html'>XML Schema</a>&ensp;|&ensp;<a href='realbasic.html'>Xojo</a>&ensp;|&ensp;<a href='xpath.html'>XQuery &amp; XPath</a>&ensp;|&ensp;<a href='xregexp.html'>XRegExp</a>&ensp;|</p><p>|&ensp;<a href='mysql.html'>MySQL</a>&ensp;|&ensp;<a href='oracle.html'>Oracle</a>&ensp;|&ensp;<a href='postgresql.html'>PostgreSQL</a>&ensp;|</p></div> <div id=copyright> <P CLASS=copyright>Page URL: <A HREF="https://www.regular-expressions.info/tcl.html" TARGET="_top">https://www.regular-expressions.info/tcl.html</A><BR> Page last updated: 24 August 2021<BR> Site last updated: 06 November 2024<BR> Copyright &copy; 2003-2024 Jan Goyvaerts. All rights reserved.</P> </div> </div> </div> </body></html>

Pages: 1 2 3 4 5 6 7 8 9 10