com.yahoo.flowetl.core.util.RegexUtil.java Source code

Java tutorial

Introduction

Here is the source code for com.yahoo.flowetl.core.util.RegexUtil.java

Source

/*******************************************************
 *                                                     *
 * Copyright (C) 2011 Yahoo! Inc. All Rights Reserved. *
 *                                                     *
 *                Licensed under the New               *
 *                 BSD License. See the                *
 *              accompanying LICENSE file              *
 *              for the specific language              *
 *              governing permissions and              *
 *                limitations under the                *
 *                       License.                      *
 *******************************************************/
package com.yahoo.flowetl.core.util;

import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.apache.commons.lang.StringUtils;

/**
 * Provides useful regex functions.
 * 
 * @author harlowja
 */
public class RegexUtil {

    /** The regex pattern extractor. */
    private static final Pattern patExtractor = Pattern.compile("/(.*?)/([\\w]*)", Pattern.UNICODE_CASE);

    /** A cache of previously created patterns */
    private static final Map<String, Pattern> compiledPats = new ConcurrentHashMap<String, Pattern>();

    private RegexUtil() {
        // a util class...
    }

    /**
     * Gets the pattern for the given string by providing the rules to do
     * extraction. This turns on caching. Ensure that you are ok with caching
     * since this could eat up memory.
     */
    public static Pattern getPattern(String str) throws PatternSyntaxException {
        return getPattern(str, true);
    }

    /**
     * Gets the pattern for the given string by providing the rules to do
     * extraction.
     * 
     * This is similar to how php does regex to match you provide in the format
     * /REGEX/options where options currently are "i" for case insensitive and
     * "u" for unicode and "m" for multiline and "s" for dotall and the value
     * inside the // is the regex to use
     * 
     * @param str
     *            the string to parsed the pattern out of
     * 
     * @param cache
     *            whether to cache the compiled pattern
     * 
     * @return the pattern
     * 
     * @throws PatternSyntaxException
     * 
     *             the pattern syntax exception if it has wrong syntax
     */
    public static Pattern getPattern(String str, boolean cache) throws PatternSyntaxException {
        if (str == null) {
            return null;
        }
        // see if we made it before...
        Pattern p = compiledPats.get(str);
        if (p != null) {
            return p;
        }
        Matcher mat = patExtractor.matcher(str);
        if (mat.matches() == false) {
            throw new PatternSyntaxException("Invalid syntax provided", str, -1);
        }
        String regex = mat.group(1);
        String opts = mat.group(2);
        int optsVal = 0;
        if (StringUtils.contains(opts, "i")) {
            optsVal |= Pattern.CASE_INSENSITIVE;
        }
        if (StringUtils.contains(opts, "u")) {
            optsVal |= Pattern.UNICODE_CASE;
        }
        if (StringUtils.contains(opts, "m")) {
            optsVal |= Pattern.MULTILINE;
        }
        if (StringUtils.contains(opts, "s")) {
            optsVal |= Pattern.DOTALL;
        }
        // compile and store it
        p = Pattern.compile(regex, optsVal);
        if (cache) {
            compiledPats.put(str, p);
        }
        return p;
    }
}