Wednesday, May 15, 2013

Quit long running regular expression in coldfusion (aka backtracking)

A problem you might encouter when working with regular expressions is you can't quit a regex currently in progress. There is no timeout setting. This can potentially freeze your server. The function below uses coldfusion's relatively new cfthread function to bypass this problem. In the example below the regex is quit if it is running for more then 3000 miliseconds.
<cffunction name="timedRegex" access="public" returntype="any" output="true">
 <cfargument name="txt" type="string">
 <cfargument name="pattern" type="string">

 <cfset result = structNew()>
 <cfset result.success = false>
 <cfset result.found = arrayNew(1)>

 <cfset var threadName = "thread_" & rereplace(createUuid(),'-','','all')>
 <cfthread action="run" name="#threadName#" pattern=#pattern# txt=#txt# result=#result#>
  <cfset var local = structNew()>
  <cfset local.objPattern = ''>
  <cfset local.objMatcher = ''>
  <cfset local.str = structNew()>
  <cfset local.c = 0>
  <cfset local.d = 0>
  <cfset local.objPattern = CreateObject("java","java.util.regex.Pattern").Compile('#trim(pattern)#') />
  <cfset local.objMatcher = local.objPattern.Matcher(txt) />
  <cfloop condition="local.objMatcher.Find()">
   <cfset result.found[arraylen(result.found)+1] = structNew()>
   <cfset result.found[arraylen(result.found)].string = objMatcher.Group() />
   <cfset result.found[arraylen(result.found)].groups = arrayNew(1) />
   <cfloop from=1 to="#objMatcher.groupCount()#" index=local.d>
    <cfset result.found[arraylen(result.found)].groups[local.d] = objMatcher.Group(local.d) />

  <cfset result.success = true>
  <cfset THREAD.response = result>

 <cfthread action="join" name="#threadName#" timeout="3000"/>
 <cfif evaluate(threadName & ".status") eq 'COMPLETED'>
  <cfreturn evaluate(threadName & ".response")>
  <cfthread action="terminate" name="#threadName#"/>
  <cfreturn result>

<cfdump var="#timedRegex(txt="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy",pattern="(x+x+)+(y)")#">
<cfdump var="#timedRegex(txt="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",pattern="(x+x+)+(y)")#">

No comments: