Example usage for java.util.regex Pattern UNICODE

Introduction

In this page you can find the example usage for java.util.regex Pattern UNICODE_CASE.

Prototype

int UNICODE_CASE

To view the source code for java.util.regex Pattern UNICODE_CASE.

Click Source Link

Document

Enables Unicode-aware case folding.

Usage

From source file:org.codice.git.RepositoryHandler.java

protected Pattern getPatternFor(String dirtyWord) {
    // quote the word if it is a literal one (not starting with regex prefix)
    final String rw = dirtyWord.startsWith(RepositoryHandler.REGEX_PREFIX)
            ? dirtyWord.substring(RepositoryHandler.REGEX_PREFIX.length())
            : Pattern.quote(dirtyWord);

    return Pattern.compile(String.format(RepositoryHandler.REGEX_PATTERN, rw),
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
}

From source file:de.undercouch.bson4jackson.BsonParserTest.java

@Test
public void parseComplex() throws Exception {
    BSONObject o = new BasicBSONObject();
    o.put("Timestamp", new BSONTimestamp(0xAABB, 0xCCDD));
    o.put("Symbol", new Symbol("Test"));
    o.put("ObjectId", new org.bson.types.ObjectId(Integer.MAX_VALUE, -2, Integer.MIN_VALUE));
    Pattern p = Pattern.compile(".*",
            Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE | Pattern.UNICODE_CASE);
    o.put("Regex", p);

    Map<?, ?> data = parseBsonObject(o);
    assertEquals(new Timestamp(0xAABB, 0xCCDD), data.get("Timestamp"));
    assertEquals(new de.undercouch.bson4jackson.types.Symbol("Test"), data.get("Symbol"));
    ObjectId oid = (ObjectId) data.get("ObjectId");
    assertEquals(Integer.MAX_VALUE, oid.getTime());
    assertEquals(-2, oid.getMachine());// ww w  . jav a2  s  . com
    assertEquals(Integer.MIN_VALUE, oid.getInc());
    Pattern p2 = (Pattern) data.get("Regex");
    assertEquals(p.flags(), p2.flags());
    assertEquals(p.pattern(), p2.pattern());
}

From source file:org.etudes.util.HtmlHelper.java

/**
 * Remove any text that match the "comments damaged from IE and Tiny" from the data.
 * //from w ww  . j a v a 2  s  . com
 * @param data
 *        the html data.
 * @return The cleaned up data.
 */
public static String stripDamagedComments(String data) {
    if (data == null)
        return data;

    // quick check for any hint of the pattern
    if (data.indexOf("<! [endif] >") == -1)
        return data;

    // Notes: DOTALL so the "." matches line terminators too, "*?" Reluctant quantifier so text between two different comments is not lost
    Pattern p = Pattern.compile("<!--\\[if.*?<! \\[endif\\] >",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL);

    Matcher m = p.matcher(data);
    StringBuffer sb = new StringBuffer();

    while (m.find()) {
        m.appendReplacement(sb, "");
    }

    m.appendTail(sb);

    // now remove the bad comment end
    String rv = sb.toString().replace("<-->", "");
    return rv;
}

From source file:edu.temple.cis3238.wiki.utils.StringUtils.java

/**
 * Removes all html tags from string//  w w w.  j  a v  a  2  s  . co  m
 *
 * @param val
 * @return
 */
public static String removeHtmlMarkups(String val) {
    String clean = "";
    try {
        Pattern pattern = Pattern.compile(REGEX_HTML_MARKUP_CHARS,
                Pattern.DOTALL | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
        Matcher matcher = pattern.matcher(val);
        try {
            clean = matcher.replaceAll("");
        } catch (IllegalArgumentException ex) {
        } catch (IndexOutOfBoundsException ex) {
        }
    } catch (PatternSyntaxException ex) {
    } //
    return toS(clean);
}

From source file:org.etudes.util.HtmlHelper.java

/**
 * Remove any text that match the "comments from Word font definitions encoded into html by Tiny" from the data.
 * //from w ww . j  a v  a  2 s  .com
 * @param data
 *        the html data.
 * @return The cleaned up data.
 */
public static String stripEncodedFontDefinitionComments(String data) {
    if (data == null)
        return data;

    // quick check for any hint of the pattern
    if (data.indexOf("&lt;!--  /* Font Definitions */") == -1)
        return data;

    // Notes: DOTALL so the "." matches line terminators too, "*?" Reluctant quantifier so text between two different comments is not lost
    Pattern p = Pattern.compile("&lt;!--  /\\* Font Definitions \\*/.*?--&gt;",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL);

    Matcher m = p.matcher(data);
    StringBuffer sb = new StringBuffer();

    while (m.find()) {
        m.appendReplacement(sb, "");
    }

    m.appendTail(sb);

    return sb.toString();
}

From source file:org.webguitoolkit.ui.controls.table.DefaultTableModel.java

/**
 * pattern for insensitive search/*w  w w  .j  a va 2  s  . c  om*/
 */
protected Pattern compilePattern(String pattern) {

    if (!pattern.startsWith(ISREGULAREXPRESSION)) {
        pattern = escapeRegexLiterals(pattern);
        pattern = pattern.replaceAll("\\*", ".*");
        pattern = pattern.replaceAll("\\?", ".");
        if (!pattern.endsWith("*"))
            pattern += ".*";

    } else {
        pattern = regularExpressionFilter(pattern);
    }
    return Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL);
}

From source file:org.wso2.carbon.identity.common.util.validation.ValidationUtils.java

public static boolean isValidFileName(String fileName) {
    String fileNameRegEx = ""; // read filename regex from identity.yaml

    if (isBlank(fileNameRegEx)) {
        fileNameRegEx = DEFAULT_FILE_NAME_REGEX;
    }//from w w w .ja  va  2 s .  co  m

    Pattern pattern = Pattern.compile(fileNameRegEx,
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.COMMENTS);
    Matcher matcher = pattern.matcher(fileName);
    return matcher.matches();
}

From source file:org.openremote.controller.protocol.telnet.TelnetCommand.java

@Override
public String read(EnumSensorType sensorType, Map<String, String> statusMap) {
    String readResponse = statusDefault;
    String filteredResponse = "";
    send(true);/*from   ww  w. j av  a 2 s. c om*/

    try {
        if ("".equals(getResponseFilter()) || getResponseFilter() == null) {
            filteredResponse = getResponse();
        } else {
            Pattern p = Pattern.compile(getResponseFilter(), Pattern.CANON_EQ | Pattern.UNICODE_CASE);
            Matcher m = p.matcher(getResponse());
            boolean b = m.matches();
            if (b) {
                String matchedGroup = m.group(getResponseFilterGroup());
                if (matchedGroup != null) {
                    filteredResponse = matchedGroup;
                }
            } else {
                logger.error("Telnet Read Status: No Match using Regex: '" + getResponseFilter()
                        + "' on response from command '" + getCommand() + "'");
            }
        }
    } catch (PatternSyntaxException e) {
        System.out.println("Telnet Read Status: REGEX ERROR");
        logger.error("Telnet Read Status: Invalid filter expression", e);
    }

    if (!"".equals(filteredResponse)) {
        switch (sensorType) {
        // Switch: on or off response needed
        case SWITCH:
            filteredResponse.replaceAll("1|on", "true");
            Boolean bool = Boolean.parseBoolean(filteredResponse);
            if (bool) {
                readResponse = "on";
            } else {
                readResponse = "off";
            }
            break;
        case LEVEL:
        case RANGE:
            try {
                Integer intVal = Integer.parseInt(filteredResponse);
                readResponse = filteredResponse;
            } catch (PatternSyntaxException e) {
                logger.info("Can't convert filteredResponse to type Integer: " + e);
            }
            break;
        default:
            readResponse = filteredResponse;
        }
    }

    return readResponse;
}

From source file:org.etudes.util.HtmlHelper.java

/**
 * Remove any text that match the "comments from Word style definitions encoded into html by Tiny" from the data.
 * //from   www  .ja v  a2  s .  c om
 * @param data
 *        the html data.
 * @return The cleaned up data.
 */
public static String stripEncodedStyleDefinitionComments(String data) {
    if (data == null)
        return data;

    // quick check for any hint of the pattern
    if (data.indexOf("&lt;!-- /* Style Definitions */") == -1)
        return data;

    // Notes: DOTALL so the "." matches line terminators too, "*?" Reluctant quantifier so text between two different comments is not lost
    Pattern p = Pattern.compile("&lt;!-- /\\* Style Definitions \\*/.*?--&gt;",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL);

    Matcher m = p.matcher(data);
    StringBuffer sb = new StringBuffer();

    while (m.find()) {
        m.appendReplacement(sb, "");
    }

    m.appendTail(sb);

    return sb.toString();
}

From source file:org.etudes.util.HtmlHelper.java

/**
 * Remove any tags only valid in headers (title base meta link style)
 * //  ww  w  .j  av a  2s.co m
 * @param data
 *        the html data.
 * @return The cleaned up data.
 */
public static String stripHeaderTags(String data) {
    if (data == null)
        return data;

    // pattern to find link/meta tags
    Pattern p = Pattern.compile("<(link|meta|title|base|style)\\s+.*?(/*>)",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL);

    Matcher m = p.matcher(data);
    StringBuffer sb = new StringBuffer();

    while (m.find()) {
        m.appendReplacement(sb, "");
    }

    m.appendTail(sb);

    return sb.toString();
}

Example usage for java.util.regex Pattern UNICODE_CASE

Introduction

Prototype

Document

Usage