Example usage for java.util.regex Matcher start

List of usage examples for java.util.regex Matcher start

Introduction

In this page you can find the example usage for java.util.regex Matcher start.

Prototype

public int start() 

Source Link

Document

Returns the start index of the previous match.

Usage

From source file:com.civprod.writerstoolbox.NaturalLanguage.util.RegexStringTokenizer.java

private static List<String> Tokenize(List<String> ReturnList, Pattern possivePattern,
        List<Pattern> ignorePatterns, int sizeGuess) {
    List<String> TempList = new java.util.ArrayList<>(sizeGuess);
    for (String curWord : ReturnList) {
        if (ignorePatterns.parallelStream()
                .noneMatch((Pattern curPattern) -> curPattern.matcher(curWord).matches())) {
            Matcher matcher = possivePattern.matcher(curWord);
            int lastEnd = 0;
            while (matcher.find()) {
                String pre = curWord.substring(lastEnd, matcher.start());
                if (!pre.isEmpty()) {
                    TempList.add(pre);// w ww  .j  a  v a  2 s  . c o  m
                }
                String match = curWord.substring(matcher.start(), matcher.end());
                if (!match.isEmpty()) {
                    TempList.add(match);
                }
                lastEnd = matcher.end();
            }
            String end = curWord.substring(lastEnd);
            if (!end.isEmpty()) {
                TempList.add(end);
            }
        } else {
            TempList.add(curWord);
        }
    }
    return TempList;
}

From source file:fr.gouv.culture.thesaurus.util.TextUtils.java

/**
 * Abbrge (dans la mesure du possible possible) le texte sans tronquer les
 * mots, en partant de la droite./*  www .  ja  v  a  2 s .c  o m*/
 * 
 * @param texte
 *            Texte  abbrger (peut tre <code>null</code>)
 * @param maxWidth
 *            Longueur max de la chane en sortie
 * @return Chane abbrge, ou <code>null</code> si le texte en entre tait
 *         <code>null</code>
 * 
 * @see StringUtils#abbreviate(String, int, int)
 */
public static String rightAbbreviateOnWords(final String texte, final int maxWidth) {
    String abbreviatedVersion;

    if (maxWidth < ELLIPSIS.length()) {
        throw new IllegalArgumentException("Max length is insufficient.");
    }

    if (texte == null || texte.length() <= maxWidth) {
        abbreviatedVersion = texte;
    } else {
        String abbreviated = texte.substring(texte.length() - maxWidth + ELLIPSIS.length());
        Matcher matcher = NON_WORD_CHARACTER.matcher(abbreviated);
        int firstWordIndex = matcher.find() ? matcher.start() : 0;
        abbreviatedVersion = ELLIPSIS + abbreviated.substring(firstWordIndex);
    }

    return abbreviatedVersion;
}

From source file:com.centurylink.mdw.util.ExpressionUtil.java

/**
 * Input is email template with image tags:
 * <code>/*from   w  w w . ja v a  2s.  c om*/
 * &lt;img src="${image:com.centurylink.mdw.base/mdw.png}" alt="MDW"&gt;
 * </code>
 * Uses the unqualified image name as its CID.  Populates imageMap with results.
 */
public static String substituteImages(String input, Map<String, String> imageMap) {
    StringBuffer substituted = new StringBuffer(input.length());
    Matcher matcher = tokenPattern.matcher(input);
    int index = 0;
    while (matcher.find()) {
        String match = matcher.group();
        substituted.append(input.substring(index, matcher.start()));
        if (imageMap != null && (match.startsWith("${image:"))) {
            String imageFile = match.substring(8, match.length() - 1);
            String imageId = imageFile.substring(imageFile.lastIndexOf('/') + 1);
            substituted.append("cid:" + imageId);
            imageMap.put(imageId, imageFile);
        } else {
            // ignore everything but images
            substituted.append(match);
        }
        index = matcher.end();
    }
    substituted.append(input.substring(index));
    return substituted.toString();
}

From source file:com.gs.obevo.db.impl.platforms.oracle.OracleReveng.java

private static ImmutableList<RevengPattern> getRevengPatterns() {
    final String schemaNameSubPattern = getSchemaObjectPattern(QUOTE, QUOTE);
    String schemaSysNamePattern = getSchemaObjectWithPrefixPattern(QUOTE, QUOTE, "SYS_");
    NamePatternType namePatternType = NamePatternType.TWO;

    // need this function to split the package and package body lines, as the Oracle reveng function combines them together
    Function<String, LineParseOutput> prependBodyLineToPackageBody = new Function<String, LineParseOutput>() {
        private final Pattern packageBodyPattern = Pattern
                .compile("(?i)create\\s+(?:or\\s+replace\\s+)(?:editionable\\s+)package\\s+body\\s+"
                        + schemaNameSubPattern, Pattern.DOTALL);

        @Override//from w w  w .  j a va  2 s . co  m
        public LineParseOutput valueOf(String object) {
            Matcher matcher = packageBodyPattern.matcher(object);
            if (matcher.find()) {
                String output = object.substring(0, matcher.start()) + "\n//// BODY\n"
                        + object.substring(matcher.start());
                return new LineParseOutput(output);
            }
            return new LineParseOutput(object);
        }
    };
    return Lists.immutable.with(
            new AbstractDdlReveng.RevengPattern(ChangeType.SEQUENCE_STR, namePatternType,
                    "(?i)create\\s+(?:or\\s+replace\\s+)?sequence\\s+" + schemaNameSubPattern)
                            .withPostProcessSql(REPLACE_TABLESPACE).withPostProcessSql(REMOVE_QUOTES),
            new AbstractDdlReveng.RevengPattern(ChangeType.TABLE_STR, namePatternType,
                    "(?i)create\\s+table\\s+" + schemaNameSubPattern).withPostProcessSql(REPLACE_TABLESPACE)
                            .withPostProcessSql(REMOVE_QUOTES),
            new AbstractDdlReveng.RevengPattern(ChangeType.TABLE_STR, namePatternType,
                    "(?i)alter\\s+table\\s+" + schemaNameSubPattern).withPostProcessSql(REMOVE_QUOTES),
            new AbstractDdlReveng.RevengPattern(ChangeType.TABLE_STR, namePatternType,
                    "(?i)create\\s+unique\\s+index\\s+" + schemaSysNamePattern + "\\s+on\\s+"
                            + schemaNameSubPattern,
                    2, 1,
                    "excludeEnvs=\"%\" comment=\"this_is_potentially_a_redundant_primaryKey_index_please_double_check\"")
                            .withPostProcessSql(REPLACE_TABLESPACE).withPostProcessSql(REMOVE_QUOTES),
            new AbstractDdlReveng.RevengPattern(ChangeType.TABLE_STR, namePatternType,
                    "(?i)create\\s+(?:unique\\s+)index\\s+" + schemaNameSubPattern + "\\s+on\\s+"
                            + schemaNameSubPattern,
                    2, 1, "INDEX").withPostProcessSql(REPLACE_TABLESPACE).withPostProcessSql(REMOVE_QUOTES),
            new AbstractDdlReveng.RevengPattern(ChangeType.FUNCTION_STR, namePatternType,
                    "(?i)create\\s+(?:or\\s+replace\\s+)?(?:force\\s+)?(?:editionable\\s+)?function\\s+"
                            + schemaNameSubPattern),
            new AbstractDdlReveng.RevengPattern(ChangeType.VIEW_STR, namePatternType,
                    "(?i)create\\s+(?:or\\s+replace\\s+)?(?:force\\s+)?(?:editionable\\s+)?view\\s+"
                            + schemaNameSubPattern),
            new AbstractDdlReveng.RevengPattern(ChangeType.SP_STR, namePatternType,
                    "(?i)create\\s+(?:or\\s+replace\\s+)(?:editionable\\s+)procedure\\s+"
                            + schemaNameSubPattern),
            new AbstractDdlReveng.RevengPattern(ChangeType.PACKAGE_STR, namePatternType,
                    "(?i)create\\s+(?:or\\s+replace\\s+)(?:editionable\\s+)package\\s+" + schemaNameSubPattern)
                            .withPostProcessSql(prependBodyLineToPackageBody),
            new AbstractDdlReveng.RevengPattern(ChangeType.SYNONYM_STR, namePatternType,
                    "(?i)create\\s+(?:or\\s+replace\\s+)(?:editionable\\s+)synonym\\s+" + schemaNameSubPattern),
            new AbstractDdlReveng.RevengPattern(ChangeType.TRIGGER_STR, namePatternType,
                    "(?i)create\\s+or\\s+replace\\s+trigger\\s+" + schemaNameSubPattern));
}

From source file:Main.java

/**
 * Unescapes an escaped file or directory name back to its original value.
 *
 * <p>See {@link #escapeFileName(String)} for more information.
 *
 * @param fileName File name to be unescaped.
 * @return The original value of the file name before it was escaped,
 *    or null if the escaped fileName seems invalid.
 *//*from w  w  w.  j a  va 2  s  . com*/
public static String unescapeFileName(String fileName) {
    int length = fileName.length();
    int percentCharacterCount = 0;
    for (int i = 0; i < length; i++) {
        if (fileName.charAt(i) == '%') {
            percentCharacterCount++;
        }
    }
    if (percentCharacterCount == 0) {
        return fileName;
    }

    int expectedLength = length - percentCharacterCount * 2;
    StringBuilder builder = new StringBuilder(expectedLength);
    Matcher matcher = ESCAPED_CHARACTER_PATTERN.matcher(fileName);
    int endOfLastMatch = 0;
    while (percentCharacterCount > 0 && matcher.find()) {
        char unescapedCharacter = (char) Integer.parseInt(matcher.group(1), 16);
        builder.append(fileName, endOfLastMatch, matcher.start()).append(unescapedCharacter);
        endOfLastMatch = matcher.end();
        percentCharacterCount--;
    }
    if (endOfLastMatch < length) {
        builder.append(fileName, endOfLastMatch, length);
    }
    if (builder.length() != expectedLength) {
        return null;
    }
    return builder.toString();
}

From source file:com.google.code.maven.plugin.http.client.utils.HttpEntityUtils.java

public static Charset getEncoding(HttpEntity entity, Charset defaultCharset, Log log) {
    try {/*from  ww  w .j a  va2 s. co m*/
        if (entity.getContentEncoding() != null) {
            return Charset.forName(entity.getContentEncoding().getName());
        } else if (entity.getContentType() != null) {
            String type = entity.getContentType().getValue();
            if (type != null) {
                Matcher charsetMatcher = CONTENT_TYPE_CHARSET_PATTERN.matcher(type);
                if (charsetMatcher.find()) {
                    Matcher delimiterMatcher = CONTENT_TYPE_CHARSET_DELIMITER_PATTERN.matcher(type);
                    String charsetName = null;
                    if (delimiterMatcher.find(charsetMatcher.end())) {
                        charsetName = type.substring(charsetMatcher.end(), delimiterMatcher.start());
                    } else {
                        charsetName = type.substring(charsetMatcher.end());
                    }
                    return Charset.forName(charsetName);
                }

            }
        } else {
            log.warn("encoding not defined in content encoding nor in content type");
        }
    } catch (IllegalCharsetNameException icne) {
        log.warn("failed to determine response encoding", icne);
    } catch (UnsupportedCharsetException uce) {
        log.warn("failed to determine response encoding", uce);
    }
    log.warn("back to default platform encoding " + DEFAULT_PLATFORM_ENCODING);
    return defaultCharset;
}

From source file:com.centurylink.mdw.util.ExpressionUtil.java

/**
 * Substitutes dynamic values for expressions in the input string.
 * @param input raw input string//from ww w  .j  a  v a 2s  .  c  om
 * @param variables variable instances to use in substitutions
 * @return string with values substituted
 */
public static String substitute(String input, List<VariableInstance> variables) throws MdwException {
    StringBuffer substituted = new StringBuffer(input.length());
    try {
        Matcher matcher = tokenPattern.matcher(input);
        int index = 0;
        while (matcher.find()) {
            String match = matcher.group();
            substituted.append(input.substring(index, matcher.start()));
            Object value = getVariableValue(match.substring(2, match.length() - 1), variables);
            if (value != null)
                substituted.append(value);
            index = matcher.end();
        }
        substituted.append(input.substring(index));
        return substituted.toString();
    } catch (Exception ex) {
        throw new MdwException("Error substituting expression value(s) in input: '" + input + "'", ex);
    }
}

From source file:com.ms.commons.utilities.CharTools.java

/**
 *  T&#24676;  T??  unicode ? 10?/*from  w  ww  .j a  v a 2s .  com*/
 * 
 * @param str
 * @return
 */
public static String unicodeUrlDecode(String str) {
    if (StringUtils.isBlank(str)) {
        return str;
    }
    StringBuilder sb = new StringBuilder();
    Matcher matcher = unicode_url_pattern.matcher(str);
    int preEnd = 0;
    while (matcher.find()) {
        String group = matcher.group(1);
        int start = matcher.start();
        sb.append(str.substring(preEnd, start));
        sb.append(fromCharCode(Integer.parseInt(group)));
        preEnd = matcher.end();
    }
    sb.append(str.substring(preEnd, str.length()));
    return sb.toString();
}

From source file:com.revolsys.util.JexlUtil.java

/**
 * <p>/*w w w.ja  v a2 s .c  o  m*/
 * Convert expressions into valid JexlExpressions, if the string does not
 * contain any expressions that match the expressionPattern then null will be
 * returned and the caller can use the raw string.
 * </p>
 * <p>
 * The expressionPattern can be used to define an alternate pattern than the
 * {@link #DEFAULT_EXPRESSION_PATTERN} that defines expressions in the form
 * ${el}. The pattern is defined as a Java Regular expression. The contents of
 * the expression part of the pattern must be enclosed in () to define the
 * group. The characters outside the first group will be removed from the
 * string and the expression portion will be added to the expression.
 * </p>
 *
 * @param expression The string containing expressions.
 * @param expressionPattern The regular expression pattern used to identify
 *          expressions in the string. The first group in the expression will
 *          be used as the expression.
 * @return The expression object for the string expression.
 * @throws Exception If there was an error creating the expression.
 */
public static Expression newExpression(final String expression, final String expressionPattern)
        throws Exception {
    final String newExpression = expression.replaceAll("\n", "");
    // Wrap the entires expression in '' and replace the expressions in the
    // form "${expr)" to ' + expr + '
    final Pattern compiledPattern = Pattern.compile(expressionPattern);
    final Matcher matcher = compiledPattern.matcher(newExpression);
    int lastEnd = 0;
    if (matcher.find()) {
        final StringBuilder jexlExpression = new StringBuilder();
        do {
            final int startIndex = matcher.start();
            if (startIndex != lastEnd) {
                final String text = newExpression.substring(lastEnd, startIndex);
                addText(jexlExpression, text);
                jexlExpression.append(" + ");
            }
            final String matchedExpression = matcher.group(1);
            jexlExpression.append(matchedExpression).append(" + ");
            lastEnd = matcher.end();
        } while (matcher.find());
        addText(jexlExpression, newExpression.substring(lastEnd));

        // Remove any empty strings from the expression to improve
        // performance
        String expr = jexlExpression.toString();
        expr = expr.replaceAll(" \\+ '' \\+ ", " + ");
        expr = expr.replaceAll("^'' \\+ ", "");
        expr = expr.replaceAll("\\+ ''$", "");
        return ExpressionFactory.createExpression(expr);
    } else {
        return null;
    }
}

From source file:com.centurylink.mdw.util.ExpressionUtil.java

/**
 * Substitutes dynamic values for expressions in the input string.
 * @param input raw input string/*ww w. ja  va2 s.  com*/
 * @param model object containing the values to substitute
 * @param map of images to populate based on special ${image:*.gif} syntax
 * @return string with values substituted
 */
public static String substitute(String input, Object model, Map<String, String> imageMap, boolean lenient)
        throws MdwException {
    StringBuffer substituted = new StringBuffer(input.length());
    try {
        Matcher matcher = tokenPattern.matcher(input);
        int index = 0;
        while (matcher.find()) {
            String match = matcher.group();
            substituted.append(input.substring(index, matcher.start()));
            if (imageMap != null && (match.startsWith("${image:") || match.startsWith("#{image:"))) {
                String imageFile = match.substring(8, match.length() - 1);
                String imageId = imageFile.substring(0, imageFile.lastIndexOf('.'));
                substituted.append("cid:" + imageId);
                imageMap.put(imageId, imageFile);
            } else if (match.startsWith("#{")) { // ignore #{... in favor of facelets (except images)
                substituted.append(match);
            } else {
                Object value;
                if (lenient) {
                    try {
                        value = propUtilsBean.getProperty(model, match.substring(2, match.length() - 1));
                        if (value == null)
                            value = match;
                    } catch (Exception e) {
                        value = match;
                    }
                } else {
                    value = propUtilsBean.getProperty(model, match.substring(2, match.length() - 1));
                }
                if (value != null)
                    substituted.append(value);
            }
            index = matcher.end();
        }
        substituted.append(input.substring(index));
        return substituted.toString();
    } catch (Exception ex) {
        throw new MdwException("Error substituting expression value(s)", ex);
    }
}