Example usage for java.util.regex Pattern CASE_INSENSITIVE

List of usage examples for java.util.regex Pattern CASE_INSENSITIVE

Introduction

In this page you can find the example usage for java.util.regex Pattern CASE_INSENSITIVE.

Prototype

int CASE_INSENSITIVE

To view the source code for java.util.regex Pattern CASE_INSENSITIVE.

Click Source Link

Document

Enables case-insensitive matching.

Usage

From source file:de.dfki.km.perspecting.obie.model.Document.java

/***************************************************************************
 * Gets the pure plain text out of a html text. All html tags are replaced
 * by spaces. To do so, the head is replaced, all remaining javascript tags
 * (including the content) and finally all remaining html tags. Thus,
 * absolute positioning is possible.//from w w w. j a  va 2  s. c o m
 * 
 * @param text
 *            content of the html document as text
 * @return text where all html was replaced by spaces
 */
private String extractPlainTextFromHtml(String text) {
    Collection<Pattern> patterns = new ArrayList<Pattern>(3);
    // Delete the head, then all remaining javascript items that might exist
    // in the body, then all remaining html tags.
    patterns.add(
            Pattern.compile("<head.*/head>", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL));
    // .*? makes it non greedy -> take the shortes match
    // DOTALL does also include new lines
    patterns.add(Pattern.compile("<script.*?/script>",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL));
    patterns.add(Pattern.compile("<.+?>", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE));
    StringBuffer s = new StringBuffer(text);

    // Go for all patterns.
    for (Pattern p : patterns) {
        Matcher matcher = p.matcher(s);

        // As long as the matcher finds another occurance of the pattern we
        // replace it by the same number of spaces but keep new lines.
        while (matcher.find())
            s.replace(matcher.start(), matcher.end(), matcher.group().replaceAll(".", " "));
    }
    return s.toString();
}

From source file:com.github.ibole.infrastructure.persistence.db.mybatis.pagination.SqlHelper.java

public static String removeXsqlBuilderOrders(String string) {
    Preconditions.checkNotNull(string);/*from   w w w  .j ava 2 s .c  o m*/
    Pattern p = Pattern.compile(XSQL_ORDER_BY_REGEX, Pattern.CASE_INSENSITIVE);
    Matcher m = p.matcher(string);
    StringBuffer sb = new StringBuffer(string.length());
    while (m.find()) {
        m.appendReplacement(sb, "");
    }
    m.appendTail(sb);
    return removeOrders(sb.toString());
}

From source file:by.heap.remark.convert.TextCleaner.java

/**
 * Configures the basic replacements based on the configured options.
 * @param options Options that will affect what is replaced.
 *///from  w ww.  j a  v  a 2  s.c  om
@SuppressWarnings({ "OverlyLongMethod" })
private void setupReplacements(Options options) {
    this.replacements = new HashMap<String, String>();

    // build replacement regex
    StringBuilder entities = new StringBuilder(replacements.size() * 5);

    // this is a special case for double-encoded HTML entities.
    entities.append("&(?>amp;([#a-z0-9]++;)|(?>");
    addRepl(entities, "&amp;", "&");
    addRepl(entities, "&lt;", "<");
    addRepl(entities, "&gt;", ">");
    addRepl(entities, "&quot;", "\"");
    if (options.reverseHtmlSmartQuotes) {
        addRepl(entities, "&ldquo;", "\"");
        addRepl(entities, "&rdquo;", "\"");
        addRepl(entities, "&lsquo;", "\'");
        addRepl(entities, "&rsquo;", "\'");
        addRepl(entities, "&apos;", "\'");
        addRepl(entities, "&laquo;", "<<");
        addRepl(entities, "&raquo;", ">>");
    }
    if (options.reverseHtmlSmartPunctuation) {
        addRepl(entities, "&ndash;", "--");
        addRepl(entities, "&mdash;", "---");
        addRepl(entities, "&hellip;", "...");
    }
    entities.replace(entities.length() - 1, entities.length(), ");)");

    entityReplacementsPattern = Pattern.compile(entities.toString(), Pattern.CASE_INSENSITIVE);

    if (options.reverseUnicodeSmartPunctuation || options.reverseUnicodeSmartQuotes) {
        StringBuilder unicode = new StringBuilder("[\\Q");
        if (options.reverseUnicodeSmartQuotes) {
            addRepl(unicode, "\u201c", "\""); // left double quote: 
            addRepl(unicode, "\u201d", "\""); // right double quote: ?
            addRepl(unicode, "\u2018", "\'"); // left single quote: 
            addRepl(unicode, "\u2019", "\'"); // right single quote: 
            addRepl(unicode, "\u00ab", "<<"); // left angle quote: 
            addRepl(unicode, "\u00bb", ">>"); // right angle quote: 
        }
        if (options.reverseUnicodeSmartPunctuation) {
            addRepl(unicode, "\u2013", "--"); // en-dash: 
            addRepl(unicode, "\u2014", "---"); // em-dash: 
            addRepl(unicode, "\u2026", "..."); // ellipsis: 
        }
        unicode.append("\\E]");
        unicodeReplacementsPattern = Pattern.compile(unicode.toString());
    }
}

From source file:com.ponysdk.impl.query.memory.FilteringTools.java

public static List<String> filter(final List<String> datas, final String patternMatching) {
    if (patternMatching == null || datas == null) {
        return datas;
    }//from w  w w .ja v a2  s  . c om
    final List<String> validData = new ArrayList<>();
    try {
        for (final String data : datas) {
            if (data == null)
                continue;
            if (data.equalsIgnoreCase(patternMatching)) {
                validData.add(data);
                continue;
            }
            // Now we can filter our data against the pattern
            final String text = normalisePattern(patternMatching.trim());
            final Pattern pattern = Pattern.compile(REGEX_BEGIN + text + REGEX_END, Pattern.CASE_INSENSITIVE);
            Matcher matcher = pattern.matcher(data);
            if (matcher.find()) {
                validData.add(data);
            } else {
                matcher = pattern.matcher("");
                if (matcher.find()) {
                    validData.add(data);
                }
            }
        }
    } catch (final PatternSyntaxException e) {
        if (log.isDebugEnabled()) {
            log.debug("bad pattern : " + patternMatching);
        }
    } catch (final Exception e) {
        log.error("Filter Error => pattern : " + patternMatching, e);
    }
    return validData;
}

From source file:com.md87.charliebravo.commands.IssueCommand.java

protected void executeOldIssue(InputHandler handler, Response response, String line) throws Exception {
    final List<String> result = Downloader.getPage("http://bugs.dmdirc.com/view.php?id=" + line);
    final StringBuilder builder = new StringBuilder();

    for (String resline : result) {
        builder.append(resline);/*from w w w .  j a va2s . c  om*/
    }

    if (builder.indexOf("APPLICATION ERROR #1100") > -1) {
        response.sendMessage("That issue was not found", true);
    } else if (builder.indexOf("<p>Access Denied.</p>") > -1) {
        response.sendMessage("that issue is private. Please see " + "http://bugs.dmdirc.com/view/" + line);
    } else {
        final Map<String, String> data = new HashMap<String, String>();

        final Pattern pattern = Pattern.compile(
                "<td class=\"category\".*?>\\s*(.*?)\\s*"
                        + "</td>\\s*(?:<!--.*?-->\\s*)?<td.*?>\\s*(.*?)\\s*</td>",
                Pattern.CASE_INSENSITIVE + Pattern.DOTALL);
        final Matcher matcher = pattern.matcher(builder);

        while (matcher.find()) {
            data.put(matcher.group(1).toLowerCase(), matcher.group(2));
        }

        response.sendMessage("issue " + data.get("id") + " is \"" + data.get("summary").substring(9)
                + "\". Current " + "status is " + data.get("status") + " (" + data.get("resolution")
                + "). See http://bugs.dmdirc.com/view/" + data.get("id"));
        response.addFollowup(new IssueFollowup(data));
    }
}

From source file:de.espend.idea.shopware.util.ShopwareUtil.java

public static void collectControllerClass(Project project, ControllerClassVisitor controllerClassVisitor,
        String... modules) {/*from  ww  w  .  jav  a 2s  .c o m*/

    PhpIndex phpIndex = PhpIndex.getInstance(project);
    Collection<PhpClass> phpClasses = phpIndex.getAllSubclasses("\\Enlight_Controller_Action");

    Pattern pattern = Pattern.compile(".*_(" + StringUtils.join(modules, "|") + ")_(\\w+)",
            Pattern.CASE_INSENSITIVE);

    for (PhpClass phpClass : phpClasses) {

        String className = phpClass.getName();
        Matcher matcher = pattern.matcher(className);

        if (matcher.find()) {
            String moduleName = matcher.group(1);
            String controller = matcher.group(2);
            controllerClassVisitor.visitClass(phpClass, moduleName, controller);
        }

    }

}

From source file:io.apiman.plugins.simpleheaderpolicy.beans.SimpleHeaderPolicyDefBean.java

@SuppressWarnings("nls")
private Pattern buildRegex(List<StripHeaderBean> itemList) {
    StringBuilder sb = new StringBuilder();
    String divider = "";

    for (StripHeaderBean stripHeader : itemList) {
        String pattern = StringUtils.strip(stripHeader.getPattern());
        sb.append(divider);/*from  www.j a  v  a 2s .  co m*/
        sb.append(pattern);
        divider = "|";
    }

    return Pattern.compile(sb.toString(), Pattern.CASE_INSENSITIVE);
}

From source file:com.intuit.tank.script.replace.AbstractReplacement.java

/**
 * Finds the pattern in the value and returns how the replacement string would like to the user
 * //from  www.java  2 s .  c o m
 * @param searchQuery
 *            the search query
 * @param replaceString
 *            The replacement string
 * @param value
 *            The value to be searched in.
 * @return
 */
private boolean isMatch(String searchQuery, String replaceString, String value) {
    searchQuery = RegexUtil.wildcardToRegexp(searchQuery);
    Pattern p = Pattern.compile(searchQuery, Pattern.CASE_INSENSITIVE);
    return p.matcher(value).matches();
}

From source file:com.ebay.nest.io.sede.RegexSerDe.java

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {

    // We can get the table definition from tbl.

    // Read the configuration parameters
    inputRegex = tbl.getProperty("input.regex");
    String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(tbl.getProperty("input.regex.case.insensitive"));

    // output format string is not supported anymore, warn user of deprecation
    if (null != tbl.getProperty("output.format.string")) {
        LOG.warn("output.format.string has been deprecated");
    }/*from ww w  . ja va2 s  .c  o m*/

    // Parse the configuration parameters
    if (inputRegex != null) {
        inputPattern = Pattern.compile(inputRegex,
                Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
    } else {
        inputPattern = null;
        throw new SerDeException("This table does not have serde property \"input.regex\"!");
    }

    List<String> columnNames = Arrays.asList(columnNameProperty.split(","));
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    assert columnNames.size() == columnTypes.size();
    numColumns = columnNames.size();

    /* Constructing the row ObjectInspector:
     * The row consists of some set of primitive columns, each column will
     * be a java object of primitive type.
     */
    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
    for (int c = 0; c < numColumns; c++) {
        TypeInfo typeInfo = columnTypes.get(c);
        String typeName = typeInfo.getTypeName();
        if (typeName.equals(serdeConstants.STRING_TYPE_NAME)) {
            columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        } else if (typeName.equals(serdeConstants.TINYINT_TYPE_NAME)) {
            columnOIs.add(PrimitiveObjectInspectorFactory.javaByteObjectInspector);
        } else if (typeName.equals(serdeConstants.SMALLINT_TYPE_NAME)) {
            columnOIs.add(PrimitiveObjectInspectorFactory.javaShortObjectInspector);
        } else if (typeName.equals(serdeConstants.INT_TYPE_NAME)) {
            columnOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
        } else if (typeName.equals(serdeConstants.BIGINT_TYPE_NAME)) {
            columnOIs.add(PrimitiveObjectInspectorFactory.javaLongObjectInspector);
        } else if (typeName.equals(serdeConstants.FLOAT_TYPE_NAME)) {
            columnOIs.add(PrimitiveObjectInspectorFactory.javaFloatObjectInspector);
        } else if (typeName.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
            columnOIs.add(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
        } else if (typeName.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
            columnOIs.add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
        } else if (typeName.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
            columnOIs.add(PrimitiveObjectInspectorFactory.javaTimestampObjectInspector);
        } else if (typeName.equals(serdeConstants.DATE_TYPE_NAME)) {
            columnOIs.add(PrimitiveObjectInspectorFactory.javaDateObjectInspector);
        } else if (typeName.equals(serdeConstants.DECIMAL_TYPE_NAME)) {
            columnOIs.add(PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector);
        } else if (typeInfo instanceof PrimitiveTypeInfo
                && ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.VARCHAR) {
            VarcharTypeParams varcharParams = (VarcharTypeParams) ParameterizedPrimitiveTypeUtils
                    .getTypeParamsFromTypeInfo(typeInfo);
            columnOIs.add(PrimitiveObjectInspectorFactory
                    .getPrimitiveJavaObjectInspector((PrimitiveTypeInfo) typeInfo));
        } else {
            throw new SerDeException(getClass().getName() + " doesn't allow column [" + c + "] named "
                    + columnNames.get(c) + " with type " + columnTypes.get(c));
        }
    }

    // StandardStruct uses ArrayList to store the row.
    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);

    row = new ArrayList<Object>(numColumns);
    // Constructing the row object, etc, which will be reused for all rows.
    for (int c = 0; c < numColumns; c++) {
        row.add(null);
    }
    outputFields = new Object[numColumns];
    outputRowText = new Text();
}

From source file:uk.ac.kcl.at.ElasticGazetteerAcceptanceTest.java

@Test
public void deidentificationPerformanceTest() {
    dbmsTestUtils.createBasicInputTable();
    dbmsTestUtils.createBasicOutputTable();
    dbmsTestUtils.createDeIdInputTable();
    List<Mutant> mutants = testUtils.insertTestDataForDeidentification(env.getProperty("tblIdentifiers"),
            env.getProperty("tblInputDocs"), mutatortype, true);

    int totalTruePositives = 0;
    int totalFalsePositives = 0;
    int totalFalseNegatives = 0;

    for (Mutant mutant : mutants) {
        Set<Pattern> mutatedPatterns = new HashSet<>();
        mutant.setDeidentifiedString(elasticGazetteerService.deIdentifyString(mutant.getFinalText(),
                String.valueOf(mutant.getDocumentid())));
        Set<String> set = new HashSet<>(mutant.getOutputTokens());
        mutatedPatterns.addAll(//from www.ja v a 2 s.c  o m
                set.stream().map(string -> Pattern.compile(Pattern.quote(string), Pattern.CASE_INSENSITIVE))
                        .collect(Collectors.toSet()));
        List<MatchResult> results = new ArrayList<>();
        for (Pattern pattern : mutatedPatterns) {
            Matcher matcher = pattern.matcher(mutant.getFinalText());
            while (matcher.find()) {
                results.add(matcher.toMatchResult());
            }
        }

        int truePositives = getTruePositiveTokenCount(mutant);
        int falsePositives = getFalsePositiveTokenCount(mutant);
        int falseNegatives = getFalseNegativeTokenCount(mutant);

        System.out.println("Doc ID " + mutant.getDocumentid() + " has " + falseNegatives
                + " unmasked identifiers from a total of " + (falseNegatives + truePositives));
        System.out.println("Doc ID " + mutant.getDocumentid() + " has " + falsePositives
                + " inaccurately masked tokens from a total of " + (falsePositives + truePositives));
        System.out.println("TP: " + truePositives + " FP: " + falsePositives + " FN: " + falseNegatives);
        System.out.println("Doc ID precision " + calcPrecision(falsePositives, truePositives));
        System.out.println("Doc ID recall " + calcRecall(falseNegatives, truePositives));
        System.out.println(mutant.getDeidentifiedString());
        System.out.println(mutant.getFinalText());
        System.out.println(mutant.getInputTokens());
        System.out.println(mutant.getOutputTokens());
        System.out.println();
        if (env.getProperty("elasticgazetteerTestOutput") != null) {
            try {
                try (BufferedWriter bw = new BufferedWriter(
                        new FileWriter(new File(env.getProperty("elasticgazetteerTestOutput") + File.separator
                                + mutant.getDocumentid())))) {
                    bw.write("Doc ID " + mutant.getDocumentid() + " has " + falseNegatives
                            + " unmasked identifiers from a total of " + (falseNegatives + truePositives));
                    bw.newLine();
                    bw.write("Doc ID " + mutant.getDocumentid() + " has " + falsePositives
                            + " inaccurately masked tokens from a total of "
                            + (falsePositives + truePositives));
                    bw.newLine();
                    bw.write("TP: " + truePositives + " FP: " + falsePositives + " FN: " + falseNegatives);
                    bw.newLine();
                    bw.write("Doc ID precision " + calcPrecision(falsePositives, truePositives));
                    bw.newLine();
                    bw.write("Doc ID recall " + calcRecall(falseNegatives, truePositives));
                    bw.newLine();
                    bw.write(mutant.getDeidentifiedString());
                    bw.newLine();
                    bw.write(mutant.getFinalText());
                    bw.newLine();
                    bw.write(mutant.getInputTokens().toString());
                    bw.newLine();
                    bw.write(mutant.getOutputTokens().toString());

                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

        totalTruePositives += truePositives;
        totalFalsePositives += falsePositives;
        totalFalseNegatives += falseNegatives;
    }
    DecimalFormat df = new DecimalFormat("#.#");
    df.setRoundingMode(RoundingMode.CEILING);

    System.out.println();
    System.out.println();
    System.out.println("THIS RUN TP: " + totalTruePositives + " FP: " + totalFalsePositives + " FN: "
            + totalFalseNegatives);
    System.out.println("Doc ID precision " + calcPrecision(totalFalsePositives, totalTruePositives));
    System.out.println("Doc ID recall " + calcRecall(totalFalseNegatives, totalTruePositives));
    System.out.println(totalTruePositives + " & " + totalFalsePositives + " & " + totalFalseNegatives + " & "
            + df.format(calcPrecision(totalFalsePositives, totalTruePositives)) + " & "
            + df.format(calcRecall(totalFalseNegatives, totalTruePositives)) + " \\\\");

    if (env.getProperty("elasticgazetteerTestOutput") != null) {
        try {
            try (BufferedWriter bw = new BufferedWriter(new FileWriter(
                    new File(env.getProperty("elasticgazetteerTestOutput") + File.separator + "summary")))) {
                bw.write("THIS RUN TP: " + totalTruePositives + " FP: " + totalFalsePositives + " FN: "
                        + totalFalseNegatives);
                bw.newLine();
                bw.write("Doc ID precision " + calcPrecision(totalFalsePositives, totalTruePositives));
                bw.newLine();
                bw.write("Doc ID recall " + calcRecall(totalFalseNegatives, totalTruePositives));
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}