CINXE.COM

Delphi Regular Expressions Classes

<!DOCTYPE html> <html lang="en"><head><meta charset="utf-8"><link rel=canonical href='https://https://www.regular-expressions.info//delphi.html'><title>Delphi Regular Expressions Classes</title> <meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="author" content="Jan Goyvaerts"> <meta name="description" content=""> <meta name="keywords" content=""> <link rel=stylesheet href="regex.css" type="text/css"><script src="theme.js" type="text/javascript"></script><link rel="alternate" type="application/rss+xml" title="New at Regular-Expressions.info" href="updates.xml"> </head> <body bgcolor=white text=black> <div id=top></div> <div id=btntop><div id=btngrid><a href="quickstart.html" target="_top"><div>Quick&nbsp;Start</div></a><a href="tutorial.html" target="_top"><div>Tutorial</div></a><a href="tools.html" target="_top"><div>Tools&nbsp;&amp;&nbsp;Languages</div></a><a href="examples.html" target="_top"><div>Examples</div></a><a href="refflavors.html" target="_top"><div>Reference</div></a><a href="books.html" target="_top"><div>Book&nbsp;Reviews</div></a></div></div> <div id=contents><div id=side> <TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>Regex Tools</TD></TR><TR><TD><A HREF="grep.html" TARGET=_top>grep</A></TD></TR><TR><TD><A HREF="powergrep.html" TARGET=_top>PowerGREP</A></TD></TR><TR><TD><A HREF="regexbuddy.html" TARGET=_top>RegexBuddy</A></TD></TR><TR><TD><A HREF="regexmagic.html" TARGET=_top>RegexMagic</A></TD></TR> </TABLE><TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>General Applications</TD></TR><TR><TD><A HREF="editpadlite.html" TARGET=_top>EditPad Lite</A></TD></TR><TR><TD><A HREF="editpadpro.html" TARGET=_top>EditPad Pro</A></TD></TR> </TABLE><TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>Languages &amp; Libraries</TD></TR><TR><TD><A HREF="boost.html" TARGET=_top>Boost</A></TD></TR><TR><TD><A HREF="delphi.html" TARGET=_top>Delphi</A></TD></TR><TR><TD><A HREF="gnu.html" TARGET=_top>GNU (Linux)</A></TD></TR><TR><TD><A HREF="groovy.html" TARGET=_top>Groovy</A></TD></TR><TR><TD><A HREF="java.html" TARGET=_top>Java</A></TD></TR><TR><TD><A HREF="javascript.html" TARGET=_top>JavaScript</A></TD></TR><TR><TD><A HREF="dotnet.html" TARGET=_top>.NET</A></TD></TR><TR><TD><A HREF="pcre.html" TARGET=_top>PCRE (C/C++)</A></TD></TR><TR><TD><A HREF="pcre2.html" TARGET=_top>PCRE2 (C/C++)</A></TD></TR><TR><TD><A HREF="perl.html" TARGET=_top>Perl</A></TD></TR><TR><TD><A HREF="php.html" TARGET=_top>PHP</A></TD></TR><TR><TD><A HREF="posix.html" TARGET=_top>POSIX</A></TD></TR><TR><TD><A HREF="powershell.html" TARGET=_top>PowerShell</A></TD></TR><TR><TD><A HREF="python.html" TARGET=_top>Python</A></TD></TR><TR><TD><A HREF="rlanguage.html" TARGET=_top>R</A></TD></TR><TR><TD><A HREF="ruby.html" TARGET=_top>Ruby</A></TD></TR><TR><TD><A HREF="stdregex.html" TARGET=_top>std::regex</A></TD></TR><TR><TD><A HREF="tcl.html" TARGET=_top>Tcl</A></TD></TR><TR><TD><A HREF="vbscript.html" TARGET=_top>VBScript</A></TD></TR><TR><TD><A HREF="vb.html" TARGET=_top>Visual Basic 6</A></TD></TR><TR><TD><A HREF="wxwidgets.html" TARGET=_top>wxWidgets</A></TD></TR><TR><TD><A HREF="xml.html" TARGET=_top>XML Schema</A></TD></TR><TR><TD><A HREF="realbasic.html" TARGET=_top>Xojo</A></TD></TR><TR><TD><A HREF="xpath.html" TARGET=_top>XQuery &amp; XPath</A></TD></TR><TR><TD><A HREF="xregexp.html" TARGET=_top>XRegExp</A></TD></TR> </TABLE><TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>Databases</TD></TR><TR><TD><A HREF="mysql.html" TARGET=_top>MySQL</A></TD></TR><TR><TD><A HREF="oracle.html" TARGET=_top>Oracle</A></TD></TR><TR><TD><A HREF="postgresql.html" TARGET=_top>PostgreSQL</A></TD></TR> </TABLE><TABLE CLASS=side CELLSPACING=0 CELLPADDING=4><TR><TD CLASS=sideheader>More on This Site</TD></TR><TR><TD><A HREF="index.html" TARGET=_top>Introduction</A></TD></TR><TR><TD><A HREF="quickstart.html" TARGET=_top>Regular Expressions Quick Start</A></TD></TR><TR><TD><A HREF="tutorial.html" TARGET=_top>Regular Expressions Tutorial</A></TD></TR><TR><TD><A HREF="replacetutorial.html" TARGET=_top>Replacement Strings Tutorial</A></TD></TR><TR><TD><A HREF="tools.html" TARGET=_top>Applications and Languages</A></TD></TR><TR><TD><A HREF="examples.html" TARGET=_top>Regular Expressions Examples</A></TD></TR><TR><TD><A HREF="refflavors.html" TARGET=_top>Regular Expressions Reference</A></TD></TR><TR><TD><A HREF="refreplace.html" TARGET=_top>Replacement Strings Reference</A></TD></TR><TR><TD><A HREF="books.html" TARGET=_top>Book Reviews</A></TD></TR><TR><TD><A HREF="print.html" TARGET=_top>Printable PDF</A></TD></TR><TR><TD><A HREF="about.html" TARGET=_top>About This Site</A></TD></TR><TR><TD><A HREF="updates.html" TARGET=_top>RSS Feed &amp; Blog</A></TD></TR></TABLE></DIV><div class=bodytext><div class=topad><A HREF="https://www.regexbuddy.com/delphi.html" TARGET="_top"><picture><source media="(max-width: 370px)" srcset="ads/320/rxbdelphi100.png 1x, ads/320/rxbdelphi150.png 1.5x, ads/320/rxbdelphi200.png 2x, ads/320/rxbdelphi250.png 2.5x, ads/320/rxbdelphi300.png 3x, ads/320/rxbdelphi350.png 3.5x, ads/320/rxbdelphi400.png 4x"><source media="(max-width: 500px)" srcset="ads/360/rxbdelphi100.png 1x, ads/360/rxbdelphi150.png 1.5x, ads/360/rxbdelphi200.png 2x, ads/360/rxbdelphi250.png 2.5x, ads/360/rxbdelphi300.png 3x, ads/360/rxbdelphi350.png 3.5x, ads/360/rxbdelphi400.png 4x"><source media="(max-width: 660px)" srcset="ads/480/rxbdelphi100.png 1x, ads/480/rxbdelphi150.png 1.5x, ads/480/rxbdelphi200.png 2x, ads/480/rxbdelphi250.png 2.5x, ads/480/rxbdelphi300.png 3x, ads/480/rxbdelphi350.png 3.5x, ads/480/rxbdelphi400.png 4x"><source media="(max-width: 747px)" srcset="ads/640/rxbdelphi100.png 1x, ads/640/rxbdelphi150.png 1.5x, ads/640/rxbdelphi200.png 2x, ads/640/rxbdelphi250.png 2.5x, ads/640/rxbdelphi300.png 3x, ads/640/rxbdelphi350.png 3.5x, ads/640/rxbdelphi400.png 4x"><img src="ads/728/rxbdelphi100.png" srcset="ads/728/rxbdelphi100.png 1x, ads/728/rxbdelphi125.png 1.25x, ads/728/rxbdelphi150.png 1.5x, ads/728/rxbdelphi175.png 1.75x, ads/728/rxbdelphi200.png 2x, ads/728/rxbdelphi250.png 2.5x, ads/728/rxbdelphi300.png 3x, ads/728/rxbdelphi350.png 3.5x, ads/728/rxbdelphi400.png 4x" alt="RegexBuddy—The best regex editor and tester for Delphi developers!"></picture></A></div> <div class=bulb><h1>Delphi Regular Expressions Classes</h1><script type="text/javascript">showbulb();</script></div> <p>Delphi XE is the first release of Delphi that has built-in support for regular expressions. In most cases you’ll use the RegularExpressions unit. This unit defines a set of records that mimic the regular expression classes in the .NET framework. Just like in .NET, they allow you to use a regular expression in just one line of code without explicit memory management.</p> <p>Internally the RegularExpressions unit uses the RegularExpressionsCore unit which defines the TPerlRegEx class. TPerlRegEx is a wrapper around the open source <A HREF="pcre.html" TARGET="_top">PCRE library</A> developed by the author of this website. Thus both the RegularExpressions and RegularExpressionsCore units use the PCRE regex flavor.</p> <h2>Delphi’s RegularExpressions unit</h2> <p>The RegularExpressions unit defines <tt class=code>TRegEx</tt>, <tt class=code>TMatch</tt>, <tt class=code>TMatchCollection</tt>, <tt class=code>TGroup</tt>, and <tt class=code>TGroupCollection</tt> as records rather than as classes. That means you don’t need to call <tt class=code>Create</tt> and <tt class=code>Free</tt> to allocate and deallocate memory.</p> <p><tt class=code>TRegEx</tt> does have a <b><tt class=code>Create</tt></b> constructor that you can call if you want to use the same regular expression more than once. That way <tt class=code>TRegEx</tt> doesn’t compile the same regex twice. If you call the constructor, you can then call any of the non-static methods that do not take the regular expression as a parameter. If you don’t call the constructor, you can only call the static (class) methods that take the regular expression as a parameter. All <tt class=code>TRegEx</tt> methods have static and non-static overloads. Which ones you use solely depends on whether you want to make more than one call to <tt class=code>TRegEx</tt> using the same regular expression.</p> <p>The <b><tt class=code>IsMatch</tt></b> method takes a string and returns True or False indicating whether the regular expression matches (part of) the string.</p> <p>The <b><tt class=code>Match</tt></b> method takes a string and returns a <tt class=code>TMatch</tt> record with the details of the first match. If the match fails, it returns a <tt class=code>TMatch</tt> record with the <tt class=code>Success</tt> property set to <tt class=code>nil</tt>. The non-static overload of <tt class=code>Match()</tt> takes an optional starting position and an optional length parameter that you can use to search through only part of the input string.</p> <p>The <b><tt class=code>Matches</tt></b> method takes a string and returns a <tt class=code>TMatchCollection</tt> record. The default <tt class=code>Item[]</tt> property of this record holds a <tt class=code>TMatch</tt> for each match the regular expression found in the string. If there are no matches, the <tt class=code>Count</tt> property of the returned <tt class=code>TMatchCollection</tt> record is zero.</p> <p>Use the <b><tt class=code>Replace</tt></b> method to search-and-replace all matches in a string. You can pass the replacement text as a string using the <A HREF="refreplace.html" TARGET="_top">JGsoft replacement text flavor</A>. Or, you can pass a <tt class=code>TMatchEvaluator</tt> which is nothing more than a method that takes one parameter called <tt class=code>Match</tt> of type <tt class=code>TMatch</tt> and returns a string. The string returned by your method is used as a literal replacement string. If you want backreferences in your string to be replaced when using the <tt class=code>TMatchEvaluator</tt> overload, call the <tt class=code>Result</tt> method on the provided <tt class=code>Match</tt> parameter before returning the string.</p> <p>Use the <b><tt class=code>Split</tt></b> method to split a string along its regex matches. The result is returned as a dynamic array of strings. As in .NET, text matched by <A HREF="brackets.html" TARGET="_top">capturing groups</A> in the regular expression are also included in the returned array. If you don’t like this, remove all <A HREF="named.html" TARGET="_top">named capturing groups</A> from your regex and pass the <tt class=code>roExplicitCapture</tt> option to disable numbered capturing groups. The non-static overload of <tt class=code>Split()</tt> takes an optional <tt class=code>Count</tt> parameter to indicate the maximum number of elements that the returned array may have. In other words, the string is split at most <tt class=code>Count-1</tt> times. Capturing group matches are not included in the count. So if your regex has capturing groups, the returned array may have more than <tt class=code>Count</tt> elements. If you pass <tt class=code>Count</tt>, you can pass a second optional parameter to indicate the position in the string at which to start splitting. The part of the string before the starting position is returned unsplit in the first element of the returned array.</p> <p>The <b><tt class=code>TMatch</tt></b> record provides several properties with details about the match. <tt class=code>Success</tt> indicates if a match was found. If this is False, all other properties and methods are invalid. <tt class=code>Value</tt> returns the matched string. <tt class=code>Index</tt> and <tt class=code>Length</tt> indicate the position in the input string and the length of the match. <tt class=code>Groups</tt> returns a TGroupCollection record that stores a <tt class=code>TGroup</tt> record in its default <tt class=code>Item[]</tt> property for each capturing group. You can use a numeric index to <tt class=code>Item[]</tt> for numbered capturing groups, and a string index for <A HREF="named.html" TARGET="_top">named capturing groups</A>.</p> <p><b><tt class=code>TMatch</tt></b> also provides two methods. <tt class=code>NextMatch</tt> returns the next match of the regular expression after this one. If your <tt class=code>TMatch</tt> is part of a <tt class=code>TMatchCollection</tt> you should not use <tt class=code>NextMatch</tt> to get the next match but use <tt class=code>TMatchCollection.Item[]</tt> instead, in order to avoid repeating the search. <tt class=code>TMatch.Result</tt> takes one parameter with the replacement text as a string using the <A HREF="refreplace.html" TARGET="_top">JGsoft replacement text flavor</A>. It returns the string that this match would have been replaced with if you had used this replacement text with <tt class=code>TRegEx.Replace</tt>.</p> <p>The <b><tt class=code>TGroup</tt></b> record has <tt class=code>Success</tt>, <tt class=code>Value</tt>, <tt class=code>Index</tt> and <tt class=code>Length</tt> properties that work just like those of the <tt class=code>TMatch</tt>.</p> <p>In Delphi XE5 and prior TRegEx always skips zero-length matches. This was fixed in Delphi XE6. You can make the same fix in XE5 and prior by modifying RegularExpressionsCore.pas to remove the line <tt>State := [preNotEmpty]</tt> from <tt>TPerlRegEx.Create</tt>. This change will also affect code that uses TPerlRegEx directly without setting the <tt>State</tt> property.</p> <h2>Regular Expressions Classes for Older Versions of Delphi</h2> <p>TPerlRegEx has been available long before Embarcadero licensed a copy for inclusion with Delphi XE. Depending on your needs, you can download one of two versions for use with Delphi 2010 and earlier.</p> <ul> <li><a href="https://www.regular-expressions.info/download/TPerlRegEx.zip">Download the latest class-based TPerlRegEx</a></li> <li><a href="https://www.regular-expressions.info/download/TPerlRegEx2009.zip">Download the older component-based TPerlRegEx</a></li> </ul> <p>The latest release of TPerlRegEx is fully compatible with the RegularExpressionsCore unit in Delphi XE. For new code written in Delphi 2010 or earlier, using the latest release of TPerlRegEx is strongly recommended. If you later migrate your code to Delphi XE, all you have to do is replace PerlRegEx with RegularExrpessionsCore in the uses clause of your units.</p> <p>The older versions of TPerlRegEx are non-visual components. This means you can put TPerlRegEx on the component palette and drop it on a form. The original TPerlRegEx was developed when Borland’s goal was to have a component for everything on the component palette.</p> <p>If you want to migrate from an older version of TPerlRegEx to the latest TPerlRegEx, start with removing any TPerlRegEx components you may have placed on forms or data modules and instantiate the objects at runtime instead. When instantiating at runtime, you no longer need to pass an owner component to the <tt class=code>Create()</tt> constructor. Simply remove the parameter.</p> <p>Some of the property and method names in the original TPerlRegEx were a bit unwieldy. These have been renamed in the latest TPerlRegEx. Essentially, in all identifiers <tt>SubExpression</tt> was replaced with <tt>Group</tt> and <tt>MatchedExpression</tt> was replaced with <tt>Matched</tt>. Here is a complete list of the changed identifiers:</p> <table class=reference><tr><th>Old Identifier</th><th>New Identifier</th></tr> <tr><td><tt>StoreSubExpression</tt></td><td><tt>StoreGroups</tt></td></tr> <tr><td><tt>NamedSubExpression</tt></td><td><tt>NamedGroup</tt></td></tr> <tr><td><tt>MatchedExpression</tt></td><td><tt>MatchedText</tt></td></tr> <tr><td><tt>MatchedExpressionLength</tt></td><td><tt>MatchedLength</tt></td></tr> <tr><td><tt>MatchedExpressionOffset</tt></td><td><tt>MatchedOffset</tt></td></tr> <tr><td><tt>SubExpressionCount</tt></td><td><tt>GroupCount</tt></td></tr> <tr><td><tt>SubExpressions</tt></td><td><tt>Groups</tt></td></tr> <tr><td><tt>SubExpressionLengths</tt></td><td><tt>GroupLengths</tt></td></tr> <tr><td><tt>SubExpressionOffsets</tt></td><td><tt>GroupOffsets</tt></td></tr> </table> <p>If you’re using RegexBuddy or RegexMagic to <A HREF="https://www.regexbuddy.com/delphi.html" TARGET="_top">generate Delphi code snippets</A>, set the language to “Delphi (TPerlRegEx)” to use the old identifiers, or to “Delphi XE (Core)” to use the new identifiers, regardless of which (older) version of Delphi you’re actually using.</p> <h2>UTF-8 Versus UTF-16</h2> <p>One thing you need to watch out for is that the TPerlRegEx versions you can download here as well as those included with Delphi XE, XE2, and XE3 use UTF8String properties and all the Offset and Length properties are indexes to those UTF-8 strings. This is because at that time PCRE only supported UTF-8 and using UTF8String avoids repeated conversions. If performance is critical, you should use TPerlRegEx instead of TRegEx with these versions of Delphi. If your data is already UTF-8, you can pass the UTF-8 directly to TPerlRegEx. If your data uses another encoding, you can control when the conversion to UTF-8 happens to avoid repeated conversions of the same data.</p> <p>In Delphi XE4 and XE5 TPerlRegEx has UnicodeString (UTF-16) properties but still returns UTF-8 offsets and lengths. In Delphi XE6 the Offset and Length properties were changed to UTF-16 offsets and lengths. This means that code that works with XE3 or XE6 that uses the Offset and Length properties will not work with XE4 and XE5 if your strings contain non-ASCII characters. Delphi XE4 through and Delphi 10 through 10.2 continued to use the UTF-8 version of PCRE even though PCRE already had native UTF-16 support. This combined with the use of UnicodeString means constant conversions between UTF-16 and UTF-8 which can significantly degrade regex performance, particularly with long subject strings.</p> <p>Delphi 10.3 and later use the UTF-16 version of PCRE on the Windows platform. TRegEx and TPerlRegEx now use UnicodeString for everything, without any conversion to UTF-8. Upgrading from Delphi XE4 or later to 10.3 or later will definitely improve the performance of any code that uses TRegEx or TPerlRegEx. Upgrading from Delphi XE3 or prior will improve performance unless you were doing everything with UTF-8.</p> <h2>Use System.Text.RegularExpressions with Delphi Prism</h2> <p>Delphi Prism was Embarcadero’s variant of the Delphi language specifically developed to target the .NET framework. Delphi Prism lived inside the Visual Studio IDE. It was based entirely on the .NET framework. In Delphi Prism you could simply add the <A HREF="dotnet.html" TARGET="_top">System.Text.RegularExpressions</A> namespace to the uses clause of your units. Then you could access the .NET regex classes such as Regex, Match, and Group. You could them with Delphi Prism just as they can be used by <A HREF="dotnet.html" TARGET="_top">C# and VB</A> developers.</p> <h2>Use System.Text.RegularExpressions with Delphi for .NET</h2> <p>Delphi 8, 2005, 2006, and 2007 included a Delphi for .NET compiler for developing WinForms and VCL.NET applications. Though Delphi for .NET only supported .NET 1.1 or 2.0, depending on your Delphi version, you could still use .NET’s full regular expression support. You only needed to add the <A HREF="dotnet.html" TARGET="_top">System.Text.RegularExpressions</A> namespace to the uses clause of your units to be able to access all the .NET regex classes.</p> <div id=cntmobi><p>|&ensp;<a href='quickstart.html'>Quick&nbsp;Start</a>&ensp;|&ensp;<a href='tutorial.html'>Tutorial</a>&ensp;|&ensp;<a href='tools.html'>Tools&nbsp;&amp;&nbsp;Languages</a>&ensp;|&ensp;<a href='examples.html'>Examples</a>&ensp;|&ensp;<a href='refflavors.html'>Reference</a>&ensp;|&ensp;<a href='books.html'>Book&nbsp;Reviews</a>&ensp;|</p><p>|&ensp;<a href='grep.html'>grep</a>&ensp;|&ensp;<a href='powergrep.html'>PowerGREP</a>&ensp;|&ensp;<a href='regexbuddy.html'>RegexBuddy</a>&ensp;|&ensp;<a href='regexmagic.html'>RegexMagic</a>&ensp;|</p><p>|&ensp;<a href='editpadlite.html'>EditPad Lite</a>&ensp;|&ensp;<a href='editpadpro.html'>EditPad Pro</a>&ensp;|</p><p>|&ensp;<a href='boost.html'>Boost</a>&ensp;|&ensp;<a href='delphi.html'>Delphi</a>&ensp;|&ensp;<a href='gnu.html'>GNU (Linux)</a>&ensp;|&ensp;<a href='groovy.html'>Groovy</a>&ensp;|&ensp;<a href='java.html'>Java</a>&ensp;|&ensp;<a href='javascript.html'>JavaScript</a>&ensp;|&ensp;<a href='dotnet.html'>.NET</a>&ensp;|&ensp;<a href='pcre.html'>PCRE (C/C++)</a>&ensp;|&ensp;<a href='pcre2.html'>PCRE2 (C/C++)</a>&ensp;|&ensp;<a href='perl.html'>Perl</a>&ensp;|&ensp;<a href='php.html'>PHP</a>&ensp;|&ensp;<a href='posix.html'>POSIX</a>&ensp;|&ensp;<a href='powershell.html'>PowerShell</a>&ensp;|&ensp;<a href='python.html'>Python</a>&ensp;|&ensp;<a href='rlanguage.html'>R</a>&ensp;|&ensp;<a href='ruby.html'>Ruby</a>&ensp;|&ensp;<a href='stdregex.html'>std::regex</a>&ensp;|&ensp;<a href='tcl.html'>Tcl</a>&ensp;|&ensp;<a href='vbscript.html'>VBScript</a>&ensp;|&ensp;<a href='vb.html'>Visual Basic 6</a>&ensp;|&ensp;<a href='wxwidgets.html'>wxWidgets</a>&ensp;|&ensp;<a href='xml.html'>XML Schema</a>&ensp;|&ensp;<a href='realbasic.html'>Xojo</a>&ensp;|&ensp;<a href='xpath.html'>XQuery &amp; XPath</a>&ensp;|&ensp;<a href='xregexp.html'>XRegExp</a>&ensp;|</p><p>|&ensp;<a href='mysql.html'>MySQL</a>&ensp;|&ensp;<a href='oracle.html'>Oracle</a>&ensp;|&ensp;<a href='postgresql.html'>PostgreSQL</a>&ensp;|</p></div> <div id=copyright> <P CLASS=copyright>Page URL: <A HREF="https://www.regular-expressions.info/delphi.html" TARGET="_top">https://www.regular-expressions.info/delphi.html</A><BR> Page last updated: 24 August 2021<BR> Site last updated: 06 November 2024<BR> Copyright &copy; 2003-2024 Jan Goyvaerts. All rights reserved.</P> </div> </div> </div> </body></html>

Pages: 1 2 3 4 5 6 7 8 9 10