Welcome to ShenZhenJia Knowledge Sharing Community for programmer and developer-Open, Learning and Share
menu search
person
Welcome To Ask or Share your Answers For Others

Categories

We've got such regexp:

var regexp = /^one (two)+ three/;

So only string like "one two three" or "one two three four" or "one twotwo three" etc. will match it.

However, if we've got string like

"one " - is still 'promising' that maybe soon it will match

but this string: "one three" will never match no matter what we'll do.

Is there some way to check if given string have chances to become matching or not?

I need it for some tips during writing when I want to recommend all options that begins with given input (regexp's I'm using are pretty long and I dont want really to mess with them).


In other words - I want to check if string has ended during checking and nothing 'not matching' was faced.

In even more other words - Answer would be inside reason of not matching. If reason is end of string - then it would be promissing. However I dont know any way to check why some string didnt match

See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
thumb_up_alt 0 like thumb_down_alt 0 dislike
118 views
Welcome To Ask or Share your Answers For Others

1 Answer

This is a regex feature known as partial matching, it's available in several regex engines such as PCRE, Boost, Java but not in JavaScript.

Andacious's answer shows a very nice way to overcome this limitation, we just need to automate this.

Well... challenge accepted :)

Fortunately, JavaScript has a very limited regex feature set with a simple syntax, so I wrote a simple parser and on-the-fly transformation for this task, based on the features listed on MDN. This code has been updated to handle ES2018 features.

A couple points of interest:

  • This produces a regex which will almost always match the empty string. Therefore a failed partial match occurs when the result of exec is null or an array whose first element is the empty string
  • Negative lookaheads are kept as-is. I believe that's the right thing to do. The only ways to fail a match is through them (ie put a (?!) in the regex) and anchors (^ and $). Lookbehinds (both positive and negative) are also kept as-is.
  • The parser assumes a valid input pattern: you can't create a RegExp object from an invalid pattern in the first place. This may break in the future if new regex features are introduced.
  • This code won't handle backreferences properly: ^(w+)s+1$ won't yield a partial match against hello hel for instance.

RegExp.prototype.toPartialMatchRegex = function() {
    "use strict";
    
    var re = this,
        source = this.source,
        i = 0;
    
    function process () {
        var result = "",
            tmp;

        function appendRaw(nbChars) {
            result += source.substr(i, nbChars);
            i += nbChars;
        };
        
        function appendOptional(nbChars) {
            result += "(?:" + source.substr(i, nbChars) + "|$)";
            i += nbChars;
        };

        while (i < source.length) {
            switch (source[i])
            {
                case "":
                    switch (source[i + 1])
                    {
                        case "c":
                            appendOptional(3);
                            break;
                            
                        case "x":
                            appendOptional(4);
                            break;
                            
                        case "u":
                            if (re.unicode) {
                                if (source[i + 2] === "{") {
                                    appendOptional(source.indexOf("}", i) - i + 1);
                                } else {
                                    appendOptional(6);
                                }
                            } else {
                                appendOptional(2);
                            }
                            break;

                        case "p":
                        case "P":
                            if (re.unicode) {
                                appendOptional(source.indexOf("}", i) - i + 1);
                            } else {
                                appendOptional(2);
                            }
                            break;

                        case "k":
                            appendOptional(source.indexOf(">", i) - i + 1);
                            break;
                            
                        default:
                            appendOptional(2);
                            break;
                    }
                    break;
                    
                case "[":
                    tmp = /[(?:\.|.)*?]/g;
                    tmp.lastIndex = i;
                    tmp = tmp.exec(source);
                    appendOptional(tmp[0].length);
                    break;
                    
                case "|":
                case "^":
                case "$":
                case "*":
                case "+":
                case "?":
                    appendRaw(1);
                    break;
                    
                case "{":
                    tmp = /{d+,?d*}/g;
                    tmp.lastIndex = i;
                    tmp = tmp.exec(source);
                    if (tmp) {
                        appendRaw(tmp[0].length);
                    } else {
                        appendOptional(1);
                    }
                    break;
                    
                case "(":
                    if (source[i + 1] == "?") {
                        switch (source[i + 2])
                        {
                            case ":":
                                result += "(?:";
                                i += 3;
                                result += process() + "|$)";
                                break;
                                
                            case "=":
                                result += "(?=";
                                i += 3;
                                result += process() + ")";
                                break;
                                
                            case "!":
                                tmp = i;
                                i += 3;
                                process();
                                result += source.substr(tmp, i - tmp);
                                break;

                            case "<":
                                switch (source[i + 3])
                                {
                                    case "=":
                                    case "!":
                                        tmp = i;
                                        i += 4;
                                        process();
                                        result += source.substr(tmp, i - tmp);
                                        break;

                                    default:
                                        appendRaw(source.indexOf(">", i) - i + 1);
                                        result += process() + "|$)";
                                        break;        
                                }
                                break;
                        }
                    } else {
                        appendRaw(1);
                        result += process() + "|$)";
                    }
                    break;
                    
                case ")":
                    ++i;
                    return result;
                    
                default:
                    appendOptional(1);
                    break;
            }
        }
        
        return result;
    }
    
    return new RegExp(process(), this.flags);
};






// Test code
(function() {
    document.write('<span style="display: inline-block; width: 60px;">Regex: </span><input id="re" value="^one (two)+ three"/><br><span style="display: inline-block; width: 60px;">Input: </span><input id="txt" value="one twotw"/><br><pre id="result"></pre>');
    document.close();

    var run = function() {
        var output = document.getElementById("result");
        try
        {
            var regex = new RegExp(document.getElementById("re").value);
            var input = document.getElementById("txt").value;
            var partialMatchRegex = regex.toPartialMatchRegex();
            var result = partialMatchRegex.exec(input);
            var matchType = regex.exec(input) ? "Full match" : result && result[0] ? "Partial match" : "No match";
            output.innerText = partialMatchRegex + "

" + matchType + "
" + JSON.stringify(result);
        }
        catch (e)
        {
            output.innerText = e;
        }
    };

    document.getElementById("re").addEventListener("input", run);
    document.getElementById("txt").addEventListener("input", run);
    run();
}());

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
thumb_up_alt 0 like thumb_down_alt 0 dislike
Welcome to ShenZhenJia Knowledge Sharing Community for programmer and developer-Open, Learning and Share
...