Example usage for java.util.regex Pattern CANON_EQ

List of usage examples for java.util.regex Pattern CANON_EQ

Introduction

In this page you can find the example usage for java.util.regex Pattern CANON_EQ.

Prototype

int CANON_EQ

To view the source code for java.util.regex Pattern CANON_EQ.

Click Source Link

Document

Enables canonical equivalence.

Usage

From source file:MainClass.java

public static void main(String[] args) {
    String pattStr = "\u00e9gal"; // gal
    String[] input = { "\u00e9gal", // gal - this one had better match :-)
            "e\u0301gal", // e + "Combining acute accent"
            "e\u02cagal", // e + "modifier letter acute accent"
            "e'gal", // e + single quote
            "e\u00b4gal", // e + Latin-1 "acute"
    };//from   w  w  w.  j a  v a  2  s.  c  om
    Pattern pattern = Pattern.compile(pattStr, Pattern.CANON_EQ);
    for (int i = 0; i < input.length; i++) {
        if (pattern.matcher(input[i]).matches()) {
            System.out.println(pattStr + " matches input " + input[i]);
        } else {
            System.out.println(pattStr + " does not match input " + input[i]);
        }
    }
}

From source file:BGrep.java

public static void main(String[] args) {
    String encodingName = "UTF-8"; // Default to UTF-8 encoding
    int flags = Pattern.MULTILINE; // Default regexp flags

    try { // Fatal exceptions are handled after this try block
        // First, process any options
        int nextarg = 0;
        while (args[nextarg].charAt(0) == '-') {
            String option = args[nextarg++];
            if (option.equals("-e")) {
                encodingName = args[nextarg++];
            } else if (option.equals("-i")) { // case-insensitive matching
                flags |= Pattern.CASE_INSENSITIVE;
            } else if (option.equals("-s")) { // Strict Unicode processing
                flags |= Pattern.UNICODE_CASE; // case-insensitive Unicode
                flags |= Pattern.CANON_EQ; // canonicalize Unicode
            } else {
                System.err.println("Unknown option: " + option);
                usage();//from   www .  j  a  v a 2  s.  c  om
            }
        }

        // Get the Charset for converting bytes to chars
        Charset charset = Charset.forName(encodingName);

        // Next argument must be a regexp. Compile it to a Pattern object
        Pattern pattern = Pattern.compile(args[nextarg++], flags);

        // Require that at least one file is specified
        if (nextarg == args.length)
            usage();

        // Loop through each of the specified filenames
        while (nextarg < args.length) {
            String filename = args[nextarg++];
            CharBuffer chars; // This will hold complete text of the file
            try { // Handle per-file errors locally
                // Open a FileChannel to the named file
                FileInputStream stream = new FileInputStream(filename);
                FileChannel f = stream.getChannel();

                // Memory-map the file into one big ByteBuffer. This is
                // easy but may be somewhat inefficient for short files.
                ByteBuffer bytes = f.map(FileChannel.MapMode.READ_ONLY, 0, f.size());

                // We can close the file once it is is mapped into memory.
                // Closing the stream closes the channel, too.
                stream.close();

                // Decode the entire ByteBuffer into one big CharBuffer
                chars = charset.decode(bytes);
            } catch (IOException e) { // File not found or other problem
                System.err.println(e); // Print error message
                continue; // and move on to the next file
            }

            // This is the basic regexp loop for finding all matches in a
            // CharSequence. Note that CharBuffer implements CharSequence.
            // A Matcher holds state for a given Pattern and text.
            Matcher matcher = pattern.matcher(chars);
            while (matcher.find()) { // While there are more matches
                // Print out details of the match
                System.out.println(filename + ":" + // file name
                        matcher.start() + ": " + // character pos
                        matcher.group()); // matching text
            }
        }
    }
    // These are the things that can go wrong in the code above
    catch (UnsupportedCharsetException e) { // Bad encoding name
        System.err.println("Unknown encoding: " + encodingName);
    } catch (PatternSyntaxException e) { // Bad pattern
        System.err.println("Syntax error in search pattern:\n" + e.getMessage());
    } catch (ArrayIndexOutOfBoundsException e) { // Wrong number of arguments
        usage();
    }
}

From source file:CanonEqDemo.java

public static void main(String[] args) {
    String pattStr = "\u00e9gal"; // Zgal
    String[] input = { "\u00e9gal", // Zgal - this one had better match :-)
            "e\u0301gal", // e + "Combining acute accent"
            "e\u02cagal", // e + "modifier letter acute accent"
            "e'gal", // e + single quote
            "e\u00b4gal", // e + Latin-1 "acute"
    };/* w w  w .  j a v a  2 s.c om*/
    Pattern pattern = Pattern.compile(pattStr, Pattern.CANON_EQ);
    for (int i = 0; i < input.length; i++) {
        if (pattern.matcher(input[i]).matches()) {
            System.out.println(pattStr + " matches input " + input[i]);
        } else {
            System.out.println(pattStr + " does not match input " + input[i]);
        }
    }
}

From source file:Normalization.TextNormalization.java

public String removeEmojiFromString(String content) {

    String utf8tweet = "";
    try {/*from  w  w  w  .  j  a v a  2s .  c o m*/
        byte[] utf8Bytes = content.getBytes("UTF-8");

        utf8tweet = new String(utf8Bytes, "UTF-8");
    } catch (UnsupportedEncodingException e) {
    }
    Pattern unicodeOutliers = Pattern.compile(
            "[\ud83c\udc00-\ud83c\udfff]|[\ud83d\udc00-\ud83d\udfff]|[\u2600-\u27ff]",
            Pattern.UNICODE_CASE | Pattern.CANON_EQ | Pattern.CASE_INSENSITIVE);
    Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet);

    utf8tweet = unicodeOutlierMatcher.replaceAll("");
    return utf8tweet;
}

From source file:nz.net.orcon.kanban.automation.actions.RegexAction.java

public String extract(String text, String expressionString, int match, int group, String options)
        throws IOException {

    if (text == null) {
        text = "";
    }//from  w w  w . j av  a2 s  .c  o m

    if (expressionString == null) {
        throw new IllegalArgumentException(
                "No Regular Expression has been provided to carry out this operation.");
    }

    int optionsInEffect = 0;
    if (options != null) {
        for (String option : options.toUpperCase().split("\\|")) {
            optionsInEffect |= (option.equals("CANON_EQ")) ? Pattern.CANON_EQ
                    : (option.equals("CASE_INSENSITIVE")) ? Pattern.CASE_INSENSITIVE
                            : (option.equals("COMMENTS")) ? Pattern.COMMENTS
                                    : (option.equals("DOTALL")) ? Pattern.DOTALL
                                            : (option.equals("LITERAL")) ? Pattern.LITERAL
                                                    : (option.equals("MULTILINE")) ? Pattern.MULTILINE
                                                            : (option.equals("UNICODE_CASE"))
                                                                    ? Pattern.UNICODE_CASE
                                                                    : (option.equals("UNIX_LINES"))
                                                                            ? Pattern.UNIX_LINES
                                                                            : 0;
        }
    }

    Pattern expression = Pattern.compile(expressionString, optionsInEffect);
    Matcher matches = expression.matcher(text);

    int matchIndex = 1;
    while (matches.find()) {
        for (int groupIndex = 0; matches.groupCount() + 1 > groupIndex; groupIndex++) {
            if (matchIndex == match && groupIndex == group) {
                return matches.group(groupIndex);
            }
        }
        matchIndex++;
    }

    return "";
}

From source file:org.opennms.netmgt.provision.adapters.link.config.linkadapter.LinkPattern.java

/**
 * <p>setPattern</p>/*from   w  w  w. ja  v a2 s  .c o m*/
 *
 * @param pattern a {@link java.lang.String} object.
 */
public void setPattern(final String pattern) {
    if (pattern != null) {
        m_compiledPattern = Pattern.compile(pattern, Pattern.CANON_EQ | Pattern.DOTALL);
        m_pattern = pattern;
    }
}

From source file:com.github.rwitzel.streamflyer.support.ProcessEndOfStreamTest.java

protected long rewriteContent(InputStream input, OutputStream output, String encoding, boolean flush)
        throws IOException {

    Charset charset = Charset.forName(encoding);
    String oldPath = "something";
    String newPath = "anything";
    String regex = "((https?://)([^/]+/))?(" + oldPath + ")";
    String replacement = "$1" + newPath;
    //        FastRegexModifier modifier = new FastRegexModifier(regex, Pattern.CASE_INSENSITIVE | Pattern.CANON_EQ,
    //                replacement);
    RegexModifier modifier = new RegexModifier(regex, Pattern.CASE_INSENSITIVE | Pattern.CANON_EQ, replacement);

    Reader reader = new ModifyingReader(new InputStreamReader(input, charset), modifier);
    Writer writer = new OutputStreamWriter(output, charset);

    int copied = IOUtils.copy(reader, writer);

    if (flush) {/*www .j a v  a 2s.c  o m*/
        writer.flush();
    }

    return copied;
}

From source file:pl.otros.logview.gui.message.pattern.PropertyPatternMessageColorizer.java

public void init(InputStream in) throws ConfigurationException {
    propertiesConfiguration = new PropertiesConfiguration();
    propertiesConfiguration.setDelimiterParsingDisabled(true);
    propertiesConfiguration.load(in, "UTF-8");
    configuration = new DataConfiguration(propertiesConfiguration);
    configuration.setDelimiterParsingDisabled(true);
    String pa = configuration.getString(PROP_PATTERN);
    int flags = 0;
    flags = flags | (configuration.getBoolean(PROP_PATTERN_CANON_EQ, false) ? Pattern.CANON_EQ : 0);
    flags = flags// w  w w  . j av a  2  s . c  o  m
            | (configuration.getBoolean(PROP_PATTERN_CASE_INSENSITIVE, false) ? Pattern.CASE_INSENSITIVE : 0);
    flags = flags | (configuration.getBoolean(PROP_PATTERN_COMMENTS, false) ? Pattern.COMMENTS : 0);
    flags = flags | (configuration.getBoolean(PROP_PATTERN_DOTALL, false) ? Pattern.DOTALL : 0);
    flags = flags | (configuration.getBoolean(PROP_PATTERN_LITERAL, false) ? Pattern.LITERAL : 0);
    flags = flags | (configuration.getBoolean(PROP_PATTERN_MULTILINE, false) ? Pattern.MULTILINE : 0);
    flags = flags | (configuration.getBoolean(PROP_PATTERN_UNICODE_CASE, false) ? Pattern.UNICODE_CASE : 0);
    flags = flags | (configuration.getBoolean(PROP_PATTERN_UNIX_LINES, false) ? Pattern.UNIX_LINES : 0);

    pattern = Pattern.compile(pa, flags);
    groupCount = countGroups(pattern);
    name = configuration.getString(PROP_NAME, "NAME NOT SET!");
    description = configuration.getString(PROP_DESCRIPTION, "DESCRIPTION NOT SET!");
    testMessage = configuration.getString(PROP_TEST_MESSAGE, "");
    version = configuration.getInt(PROP_VERSION, 1);
}

From source file:com.google.code.configprocessor.processing.ModifyAction.java

protected int parseFlags() {
    int flagsToUse = 0;
    String flagsToTest = getFlags() == null ? DEFAULT_PATTERN_FLAGS : getFlags();
    String[] flagArray = StringUtils.split(flagsToTest, PATTERN_FLAG_SEPARATOR);
    for (String flag : flagArray) {
        if ("UNIX_LINES".equals(flag)) {
            flagsToUse |= Pattern.UNIX_LINES;
        } else if ("CASE_INSENSITIVE".equals(flag)) {
            flagsToUse |= Pattern.CASE_INSENSITIVE;
        } else if ("COMMENTS".equals(flag)) {
            flagsToUse |= Pattern.COMMENTS;
        } else if ("MULTILINE".equals(flag)) {
            flagsToUse |= Pattern.MULTILINE;
        } else if ("LITERAL".equals(flag)) {
            flagsToUse |= Pattern.LITERAL;
        } else if ("DOTALL".equals(flag)) {
            flagsToUse |= Pattern.DOTALL;
        } else if ("UNICODE_CASE".equals(flag)) {
            flagsToUse |= Pattern.UNICODE_CASE;
        } else if ("CANON_EQ".equals(flag)) {
            flagsToUse |= Pattern.CANON_EQ;
        } else {/*from   www  .j  av a 2 s .c om*/
            throw new IllegalArgumentException("Unknown flag: " + flag);
        }
    }

    return flagsToUse;
}

From source file:org.talend.core.model.utils.ContextParameterUtils.java

/**
 * //w w w  .j  ava 2 s . co m
 * ggu Comment method "getVariableFromCode".
 * 
 * only for new script code and the first variables. and if there is no variable in code, return null.
 */
public static String getVariableFromCode(String code) {
    if (code == null) {
        return null;
    }
    // if (isContainContextParam(code)) {
    String pattern = null;
    String varPattern = "(.+?)"; //$NON-NLS-1$
    String wordPattern = "\\b"; //$NON-NLS-1$
    pattern = wordPattern + replaceCharForRegex(JAVA_NEW_CONTEXT_PREFIX) + varPattern + wordPattern;
    if (pattern != null) {
        Pattern regex = Pattern.compile(pattern, Pattern.CANON_EQ);
        Matcher regexMatcher = regex.matcher(code);
        if (regexMatcher.find()) {
            try {
                String var = regexMatcher.group(1);
                if (var != null && ContextParameterUtils.isValidParameterName(var)) {
                    return var;
                }
            } catch (RuntimeException re) {
                // not match
            }
        }
    }
    // }
    return null;
}