RegexpMaker.java :  » IDE-Netbeans » utilities » org » netbeans » modules » search » Java Open Source

Java Open Source » IDE Netbeans » utilities 
utilities » org » netbeans » modules » search » RegexpMaker.java
/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
 *
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common
 * Development and Distribution License("CDDL") (collectively, the
 * "License"). You may not use this file except in compliance with the
 * License. You can obtain a copy of the License at
 * http://www.netbeans.org/cddl-gplv2.html
 * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
 * specific language governing permissions and limitations under the
 * License.  When distributing the software, include this License Header
 * Notice in each file and include the License file at
 * nbbuild/licenses/CDDL-GPL-2-CP.  Sun designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Sun in the GPL Version 2 section of the License file that
 * accompanied this code. If applicable, add the following below the
 * License Header, with the fields enclosed by brackets [] replaced by
 * your own identifying information:
 * "Portions Copyrighted [year] [name of copyright owner]"
 *
 * Contributor(s):
 *
 * Portions Copyrighted 2007 Sun Microsystems, Inc.
 */

package org.netbeans.modules.search;

import java.util.regex.Pattern;

/**
 * Parser of simple regular expressions with only three supported special
 * characters {@code '*'} (zero or more), {@code '?'} (zero or one)
 * and {@code '\\'} (quotes the following character).
 *
 * @author  Marian Petras
 */
final class RegexpMaker {

    /** regular expression representing a set of word characters */
    private static final String wordCharsExpr
                                = "[\\p{javaLetterOrDigit}_]";          //NOI18N
    /**
     * regular expression representing negative lookbehind
     * for a {@linkplain #wordCharsExpr word character}
     */
    private static final String checkNotAfterWordChar
                                = "(?<!" + wordCharsExpr + ")";         //NOI18N
    /**
     * regular expression representing negative lookahead
     * for a {@linkplain #wordCharsExpr word character}
     */
    private static final String checkNotBeforeWordChar
                                = "(?!" + wordCharsExpr + ")";          //NOI18N
    
    private RegexpMaker() {
    }

    /**
     * Translates the given simple pattern to a regular expression.
     * 
     * @param  simplePattern  pattern to be translated
     * @return  regular expression corresponding to the simple pattern
     */
    static String makeRegexp(String simplePattern) {

        /* This method is currently used only in tests. */

        return makeRegexp(simplePattern, false);
    }

    /**
     * Translates the given simple pattern to a regular expression.
     * 
     * @param  simplePattern  pattern to be translated
     * @param  wholeWords  whether the <i>Whole Words</i> option is selected
     * @return  regular expression corresponding to the simple pattern
     */
    static String makeRegexp(String simplePattern, boolean wholeWords) {
        if (simplePattern.length() == 0) {              //trivial case
            return simplePattern;
        }
        
        if (!wholeWords
                && Pattern.matches("[a-zA-Z0-9 ]*", simplePattern)) {   //NOI18N
            return simplePattern;                       //trivial case
        }
        
        StringBuilder buf = new StringBuilder(simplePattern.length() + 16);
        boolean quoted = false;
        boolean starPresent = false;
        int minCount = 0;

        boolean bufIsEmpty = true;
        char lastInputChar = '*';       //might be any other non-word character
        for (char c : simplePattern.toCharArray()) {
            if (quoted) {
                assert !starPresent && (minCount == 0);
                if (wholeWords && bufIsEmpty && isWordChar(c)) {
                    buf.append(checkNotAfterWordChar);
                }
                if (!isSimpleCharacter(c)) {
                    buf.append('\\');
                }
                buf.append(c);
                lastInputChar = c;
                bufIsEmpty = false;
                quoted = false;
            } else if (c == '?') {
                minCount++;
            } else if (c == '*') {
                starPresent = true;
            } else {
                if (starPresent || (minCount != 0)) {
                    if (wholeWords && bufIsEmpty && !starPresent) {
                        buf.append(checkNotAfterWordChar);
                    }
                    bufIsEmpty &= !addMetachars(buf, starPresent, minCount, wholeWords, !bufIsEmpty);
                    starPresent = false;
                    minCount = 0;
                }

                if (c == '\\') {
                    quoted = true;
                } else {
                    if (wholeWords && bufIsEmpty && isWordChar(c)) {
                        buf.append(checkNotAfterWordChar);
                    }
                    if (!isSimpleCharacter(c)) {
                        buf.append('\\');
                    }
                    buf.append(c);
                    lastInputChar = c;
                    bufIsEmpty = false;
                }
            }
        }
        if (quoted) {
            assert !starPresent && (minCount == 0);
            buf.append('\\').append('\\');
            lastInputChar = '\\';
            bufIsEmpty = false;
            quoted = false;
        } else if (starPresent || (minCount != 0)) {
            if (wholeWords && !starPresent && bufIsEmpty) {
                buf.append(checkNotAfterWordChar);
            }
            bufIsEmpty &= !addMetachars(buf, starPresent, minCount, wholeWords, false);
            if (wholeWords && !starPresent) {
                buf.append(checkNotBeforeWordChar);
            }
            lastInputChar = '*';    //might be any other non-word character
            starPresent = false;
            minCount = 0;
        }
        if (wholeWords && isWordChar(lastInputChar)) {
            buf.append(checkNotBeforeWordChar);
        }
        return buf.toString();
    }

    /**
     * Checks whether the given character is a word character.
     * @param  c  character to be checked
     * @return  {@code true} if the character is a word character,
     *          {@code false} otherwise
     * @see  #wordCharsExpr
     */
    private static boolean isWordChar(char c) {
        /* not necessary - just for performance */
        if ((c == '*') || (c == '\\')) {
            return false;
        }

        assert wordCharsExpr == "[\\p{javaLetterOrDigit}_]"             //NOI18N
               : "update implementation of method isWordChar(char)";    //NOI18N
        return (c == '_') || Character.isLetterOrDigit(c);
    }

    /**
     * Generates the part of a regular expression, that represents a sequence
     * of simple expression's metacharacters {@code '*'} and {@code '?'},
     * and adds it to the given string buffer.
     * 
     * @param  buf  string buffer to which the new part is to be added
     * @param  starPresent  whether the sequence contained at least one
     *                      {@code '*'} character
     * @param  minCount  number of {@code '?'} characters in the sequence
     * @param  wholeWords  whether the <i>Whole Words</i> option is selected
     * @param  middle  whether the metachars are to be placed in the middle
     *                 (i.e. not in the beginning or at the end) of the search
     *                 expression
     * @return  {@code true} if something was added to the string buffer,
     *          {@code false} if the buffer was not modified
     */
    private static boolean addMetachars(final StringBuilder buf,
                                     boolean starPresent,
                                     final int minCount,
                                     final boolean wholeWords,
                                     final boolean middle) {
        assert starPresent || (minCount != 0);

        /*
         * If 'Whole Words' is not activated, ignore stars in the beginning
         * and at the end of the expression:
         */
        if (starPresent && !wholeWords && !middle) {
            starPresent = false;
        }

        if ((minCount == 0) && !starPresent) {
            return false;
        }

        if (wholeWords) {
            buf.append(wordCharsExpr);
        } else {
            buf.append('.');
        }
        switch (minCount) {
        case 0:
            assert starPresent;
            buf.append('*');
            break;
        case 1:
            if (starPresent) {
                buf.append('+');
            }
            break;
        default:
            if (wholeWords) {
                buf.append('{').append(minCount);
                if (starPresent) {
                    buf.append(',');
                }
                buf.append('}');
            } else {
                for (int i = 1; i < minCount; i++) {
                    buf.append('.');
                }
                if (starPresent) {
                    buf.append('+');
                }
            }
        }
        if (starPresent && middle) {
            buf.append('?');    //use reluctant variant of the quantifier
        }
        return true;
    }
    
    /**
     * Translates the given simple pattern (or several patterns) to a single
     * regular expression.
     * 
     * @param  simplePatternList  pattern list to be translated
     * @return  regular expression corresponding to the simple pattern
     *          (or to the list of simple patterns)
     */
    static String makeMultiRegexp(String simplePatternList) {
        if (simplePatternList.length() == 0) {              //trivial case
            return simplePatternList;
        }
        
        if (Pattern.matches("[a-zA-Z0-9]*", simplePatternList)) {       //NOI18N
            return simplePatternList;                       //trivial case
        }
        
        StringBuilder buf = new StringBuilder(simplePatternList.length() + 16);
        boolean lastWasSeparator = false;
        boolean quoted = false;
        boolean starPresent = false;
        for (char c : simplePatternList.toCharArray()) {
            if (quoted) {
                if (!isSimpleCharacter(c)) {
                    buf.append('\\');
                }
                buf.append(c);
                quoted = false;
            } else if ((c == ',') || (c == ' ')) {
                if (starPresent) {
                    buf.append('.').append('*');
                    starPresent = false;
                }
                lastWasSeparator = true;
            } else {
                if (lastWasSeparator && (buf.length() != 0)) {
                    buf.append('|');
                }
                if (c == '?') {
                    buf.append('.');
                } else if (c == '*') {
                    starPresent = true;
                } else {
                    if (starPresent) {
                        buf.append('.').append('*');
                        starPresent = false;
                    }
                    if (c == '\\') {
                        quoted = true;
                    } else {
                        if (!isSimpleCharacter(c)) {
                            buf.append('\\');
                        }
                        buf.append(c);
                    }
                }
                lastWasSeparator = false;
            }
        }
        if (quoted) {
            buf.append('\\').append('\\');
            quoted = false;
        } else if (starPresent) {
            buf.append('.').append('*');
            starPresent = false;
        }
        return buf.toString();
    }
    
    private static boolean isSimpleCharacter(char c) {
        int cint = (int) c;
        return (cint == 0x20)                               //space
                || (cint > 0x7f)                            //non-ASCII
                || (cint >= 0x30) && (cint <= 0x39)          //'0' .. '9'
                || (cint & ~0x7f) == 0
                    && ((cint &= ~0x20) >= 0x41) && (cint <= 0x5a); //a..z,A..Z
    }

}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.