I run into this article:
Performance: Compiled vs. Interpreted Regular Expressions, I modified the sample code to compile 1000 Regex and then run each 500 times to take advantage of precompilation, however even in that case interpreted RegExes run 4 times faster!
This means RegexOptions.Compiled
option is completely useless, actually even worse, it's slower! Big difference was due to JIT, after solving JIT compiled regex in the the following code still performs a little bit slow and doesn't make sense to me but @Jim in the answers provided a much cleaner version which works as expected.
Can anyone explain why this is the case?
Code, taken & modified from the blog post:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace RegExTester
{
class Program
{
static void Main(string[] args)
{
DateTime startTime = DateTime.Now;
for (int i = 0; i < 1000; i++)
{
CheckForMatches("some random text with email address, address@domain200.com" + i.ToString());
}
double msTaken = DateTime.Now.Subtract(startTime).TotalMilliseconds;
Console.WriteLine("Full Run: " + msTaken);
startTime = DateTime.Now;
for (int i = 0; i < 1000; i++)
{
CheckForMatches("some random text with email address, address@domain200.com" + i.ToString());
}
msTaken = DateTime.Now.Subtract(startTime).TotalMilliseconds;
Console.WriteLine("Full Run: " + msTaken);
Console.ReadLine();
}
private static List<Regex> _expressions;
private static object _SyncRoot = new object();
private static List<Regex> GetExpressions()
{
if (_expressions != null)
return _expressions;
lock (_SyncRoot)
{
if (_expressions == null)
{
DateTime startTime = DateTime.Now;
List<Regex> tempExpressions = new List<Regex>();
string regExPattern =
@"^[a-zA-Z0-9]+[a-zA-Z0-9._%-]*@{0}$";
for (int i = 0; i < 2000; i++)
{
tempExpressions.Add(new Regex(
string.Format(regExPattern,
Regex.Escape("domain" + i.ToString() + "." +
(i % 3 == 0 ? ".com" : ".net"))),
RegexOptions.IgnoreCase));// | RegexOptions.Compiled
}
_expressions = new List<Regex>(tempExpressions);
DateTime endTime = DateTime.Now;
double msTaken = endTime.Subtract(startTime).TotalMilliseconds;
Console.WriteLine("Init:" + msTaken);
}
}
return _expressions;
}
static List<Regex> expressions = GetExpressions();
private static void CheckForMatches(string text)
{
DateTime startTime = DateTime.Now;
foreach (Regex e in expressions)
{
bool isMatch = e.IsMatch(text);
}
DateTime endTime = DateTime.Now;
//double msTaken = endTime.Subtract(startTime).TotalMilliseconds;
//Console.WriteLine("Run: " + msTaken);
}
}
}
See Question&Answers more detail:os