Example usage for java.util.regex Matcher start

List of usage examples for java.util.regex Matcher start

Introduction

In this page you can find the example usage for java.util.regex Matcher start.

Prototype

public int start(String name) 

Source Link

Document

Returns the start index of the subsequence captured by the given named-capturing group during the previous match operation.

Usage

From source file:com.akop.bach.parser.XboxLiveParser.java

public static String getLargeGamerpic(String iconUrl) {
    Matcher m;

    // Non-avatar (classic) gamerpic
    if ((m = PATTERN_GAMERPIC_CLASSIC.matcher(iconUrl)).find())
        return String.format("%s2%s", iconUrl.substring(0, m.start(1)), m.group(2));
    // Avatar (NXE) gamerpic
    else if ((m = PATTERN_GAMERPIC_AVATAR.matcher(iconUrl)).find())
        return String.format("%sl%s", iconUrl.substring(0, m.start(1)), m.group(2));

    if (App.getConfig().logToConsole())
        App.logv("%s has an unrecognized format; returning original", iconUrl);

    return iconUrl;
}

From source file:com.cyberway.issue.crawler.extractor.ExtractorHTML.java

protected void processGeneralTag(CrawlURI curi, CharSequence element, CharSequence cs) {

    Matcher attr = TextUtils.getMatcher(EACH_ATTRIBUTE_EXTRACTOR, cs);

    // Just in case it's an OBJECT or APPLET tag
    String codebase = null;/* w  ww.  ja  v  a2s.  com*/
    ArrayList<String> resources = null;

    // Just in case it's a FORM
    CharSequence action = null;
    CharSequence actionContext = null;
    CharSequence method = null;

    final boolean framesAsEmbeds = ((Boolean) getUncheckedAttribute(curi, ATTR_TREAT_FRAMES_AS_EMBED_LINKS))
            .booleanValue();

    final boolean ignoreFormActions = ((Boolean) getUncheckedAttribute(curi, ATTR_IGNORE_FORM_ACTION_URLS))
            .booleanValue();

    final boolean extractValueAttributes = ((Boolean) getUncheckedAttribute(curi, EXTRACT_VALUE_ATTRIBUTES))
            .booleanValue();

    final String elementStr = element.toString();

    while (attr.find()) {
        int valueGroup = (attr.start(14) > -1) ? 14 : (attr.start(15) > -1) ? 15 : 16;
        int start = attr.start(valueGroup);
        int end = attr.end(valueGroup);
        assert start >= 0 : "Start is: " + start + ", " + curi;
        assert end >= 0 : "End is :" + end + ", " + curi;
        CharSequence value = cs.subSequence(start, end);
        value = TextUtils.unescapeHtml(value);
        if (attr.start(2) > -1) {
            // HREF
            CharSequence context = Link.elementContext(element, attr.group(2));
            if (elementStr.equalsIgnoreCase(LINK)) {
                // <LINK> elements treated as embeds (css, ico, etc)
                processEmbed(curi, value, context);
            } else {
                // other HREFs treated as links
                if (value.toString().indexOf("java") != -1)
                    System.out.println(value + "--------javascript--------");
                processLink(curi, value, context);
            }
            if (elementStr.equalsIgnoreCase(BASE)) {
                try {
                    curi.setBaseURI(value.toString());
                } catch (URIException e) {
                    if (getController() != null) {
                        // Controller can be null: e.g. when running
                        // ExtractorTool.
                        getController().logUriError(e, curi.getUURI(), value.toString());
                    } else {
                        logger.info("Failed set base uri: " + curi + ", " + value.toString() + ": "
                                + e.getMessage());
                    }
                }
            }
        } else if (attr.start(3) > -1) {
            // ACTION
            if (!ignoreFormActions) {
                action = value;
                actionContext = Link.elementContext(element, attr.group(3));
                // handling finished only at end (after METHOD also collected)
            }
        } else if (attr.start(4) > -1) {
            // ON____
            processScriptCode(curi, value); // TODO: context?
        } else if (attr.start(5) > -1) {
            // SRC etc.
            CharSequence context = Link.elementContext(element, attr.group(5));

            // true, if we expect another HTML page instead of an image etc.
            final char hopType;

            if (!framesAsEmbeds
                    && (elementStr.equalsIgnoreCase(FRAME) || elementStr.equalsIgnoreCase(IFRAME))) {
                hopType = Link.NAVLINK_HOP;
            } else {
                hopType = Link.EMBED_HOP;
            }
            processEmbed(curi, value, context, hopType);
        } else if (attr.start(6) > -1) {
            // CODEBASE
            codebase = (value instanceof String) ? (String) value : value.toString();
            CharSequence context = Link.elementContext(element, attr.group(6));
            processEmbed(curi, codebase, context);
        } else if (attr.start(7) > -1) {
            // CLASSID, DATA
            if (resources == null) {
                resources = new ArrayList<String>();
            }
            resources.add(value.toString());
        } else if (attr.start(8) > -1) {
            // ARCHIVE
            if (resources == null) {
                resources = new ArrayList<String>();
            }
            String[] multi = TextUtils.split(WHITESPACE, value);
            for (int i = 0; i < multi.length; i++) {
                resources.add(multi[i]);
            }
        } else if (attr.start(9) > -1) {
            // CODE
            if (resources == null) {
                resources = new ArrayList<String>();
            }
            // If element is applet and code value does not end with
            // '.class' then append '.class' to the code value.
            if (elementStr.equalsIgnoreCase(APPLET) && !value.toString().toLowerCase().endsWith(CLASSEXT)) {
                resources.add(value.toString() + CLASSEXT);
            } else {
                resources.add(value.toString());
            }
        } else if (attr.start(10) > -1) {
            // VALUE, with possibility of URI
            if (extractValueAttributes && TextUtils.matches(LIKELY_URI_PATH, value)) {
                CharSequence context = Link.elementContext(element, attr.group(10));
                processLink(curi, value, context);
            }

        } else if (attr.start(11) > -1) {
            // STYLE inline attribute
            // then, parse for URIs
            this.numberOfLinksExtracted += ExtractorCSS.processStyleCode(curi, value, getController());

        } else if (attr.start(12) > -1) {
            // METHOD
            method = value;
            // form processing finished at end (after ACTION also collected)
        } else if (attr.start(13) > -1) {
            // any other attribute
            // ignore for now
            // could probe for path- or script-looking strings, but
            // those should be vanishingly rare in other attributes,
            // and/or symptomatic of page bugs
        }
    }
    TextUtils.recycleMatcher(attr);

    // finish handling codebase/resources now that all available
    if (resources != null) {
        Iterator iter = resources.iterator();
        UURI codebaseURI = null;
        String res = null;
        try {
            if (codebase != null) {
                // TODO: Pass in the charset.
                codebaseURI = UURIFactory.getInstance(curi.getUURI(), codebase);
            }
            while (iter.hasNext()) {
                res = iter.next().toString();
                res = (String) TextUtils.unescapeHtml(res);
                if (codebaseURI != null) {
                    res = codebaseURI.resolve(res).toString();
                }
                processEmbed(curi, res, element); // TODO: include attribute too
            }
        } catch (URIException e) {
            curi.addLocalizedError(getName(), e, "BAD CODEBASE " + codebase);
        } catch (IllegalArgumentException e) {
            DevUtils.logger.log(Level.WARNING, "processGeneralTag()\n" + "codebase=" + codebase + " res=" + res
                    + "\n" + DevUtils.extraInfo(), e);
        }
    }

    // finish handling form action, now method is available
    if (action != null) {
        if (method == null || "GET".equalsIgnoreCase(method.toString())
                || !((Boolean) getUncheckedAttribute(curi, ATTR_EXTRACT_ONLY_FORM_GETS)).booleanValue()) {
            processLink(curi, action, actionContext);
        }
    }
}

From source file:birch.filter.EncryptionFilter.java

protected String doReceive(String message) {
    StringBuilder result;/*from w ww  .  j  a v a  2s.  co  m*/
    String target;
    Matcher messageMatcher;
    Matcher encryptedMatcher;

    message = linebreakFilter.receive(message);
    if (message.length() == 0) {
        return message;
    }

    messageMatcher = messagePattern.matcher(message);
    if (messageMatcher.matches()) {
        target = messageMatcher.group(3).startsWith("#") ? messageMatcher.group(3)
                : messageMatcher.group(1).substring(1, messageMatcher.group(1).indexOf('!'));
        if (inEncryptionList(target)) {
            result = new StringBuilder();
            result.append(":");
            result.append(messageMatcher.group(1));
            result.append(" ");
            result.append(messageMatcher.group(2));
            result.append(" ");
            result.append(messageMatcher.group(3));
            result.append(" :");

            encryptedMatcher = activeEncryptedPattern.matcher(messageMatcher.group(4));
            if (encryptedMatcher.matches()) {
                result.append(messageMatcher.group(4).substring(0,
                        encryptedMatcher.start(encryptedMatcher.groupCount())));
                result.append(decrypt(target, encryptedMatcher.group(encryptedMatcher.groupCount())));
            } else {
                result.append(plainPrefix);
                result.append(messageMatcher.group(4));
            }

            return result.toString();
        }
    }

    return message;
}

From source file:net.java.sip.communicator.impl.gui.main.chat.ChatConversationPanel.java

/**
 * Process provided replacers one by one sequentially. The output of the
 * first replacer is then fed as input into the second replacer, and so on.
 * <p>//from w  w w.ja v  a  2 s.c o m
 * {@link Replacer}s that expect HTML content (
 * {@link Replacer#expectsPlainText()}) will typically receive the complete
 * message as an argument. {@linkplain Replacer}s that expect plain text
 * content will typically receive small pieces that are found in between
 * HTML tags. The pieces of plain text content cannot be predicted as
 * results change when they are processed by other replacers.
 * </p>
 *
 * @param content the original content to process
 * @param replacers the replacers to call
 * @return returns the final result message content after it has been
 *         processed by all replacers
 */
private String processReplacers(final String content, final Replacer... replacers) {
    StringBuilder source = new StringBuilder(content);
    for (final Replacer replacer : replacers) {
        final StringBuilder target = new StringBuilder();
        if (replacer.expectsPlainText()) {
            int startPos = 0;
            final Matcher plainTextInHtmlMatcher = TEXT_TO_REPLACE_PATTERN.matcher(source);
            while (plainTextInHtmlMatcher.find()) {
                final String plainTextAsHtml = plainTextInHtmlMatcher.group(1);
                final int startMatchPosition = plainTextInHtmlMatcher.start(1);
                final int endMatchPosition = plainTextInHtmlMatcher.end(1);
                target.append(source.substring(startPos, startMatchPosition));
                final String plaintext = StringEscapeUtils.unescapeHtml4(plainTextAsHtml);

                // Invoke replacer.
                try {
                    replacer.replace(target, plaintext);
                } catch (RuntimeException e) {
                    logger.error("An error occurred in replacer: " + replacer.getClass().getName(), e);
                }

                startPos = endMatchPosition;
            }
            target.append(source.substring(startPos));
        } else {
            // Invoke replacer.
            try {
                replacer.replace(target, source.toString());
            } catch (RuntimeException e) {
                logger.error("An error occurred in replacer: " + replacer.getClass().getName(), e);
            }
        }
        source = target;
    }
    return source.toString();
}

From source file:com.github.gekoh.yagen.ddl.CreateDDL.java

public String updateCreateSequence(Dialect dialect, String sqlCreate, Type type) {
    Matcher matcher = SEQ_CREATE_PATTERN.matcher(sqlCreate);

    if (matcher.find()) {
        StringBuilder sql = new StringBuilder();
        sql.append(sqlCreate.substring(0, matcher.start(1)));
        sql.append(getProfile().getNamingStrategy().sequenceName(matcher.group(1)));
        sql.append(sqlCreate.substring(matcher.end(1)));
        sqlCreate = sql.toString();/*from w  w w . j a v  a  2s.  c o  m*/
    }

    return sqlCreate;
}

From source file:com.github.gekoh.yagen.ddl.CreateDDL.java

private String getI18NDetailTableCreateString(Dialect dialect, String sqlCreate, String i18nTblName,
        String i18nFKColName) {/*from   w w  w  .  j  a v  a 2  s .c o  m*/
    StringBuilder sql = new StringBuilder();
    Matcher matcher = TBL_PATTERN.matcher(sqlCreate);
    checkTableName(dialect, i18nTblName);

    if (matcher.matches()) {
        sql.append(sqlCreate.substring(0, matcher.start(TBL_PATTERN_IDX_TBLNAME))).append(i18nTblName);

        Matcher colMatcher = COL_PATTERN.matcher(sqlCreate);

        int idx = sqlCreate.indexOf('(', matcher.end(TBL_PATTERN_IDX_TBLNAME)) + 1;
        sql.append(sqlCreate.substring(matcher.end(TBL_PATTERN_IDX_TBLNAME), idx));

        StringBuilder colDef = new StringBuilder();
        while (colMatcher.find(idx)) {
            String colName = TableConfig.getIdentifierForReference(colMatcher.group(COL_PATTERN_IDX_COLNAME));
            if (!colName.toLowerCase().equals(I18N_COLUMN_IS_PERSISTENT)
                    && !colName.toLowerCase().equals(I18N_COLUMN_COMPOSITE_ID)) {
                colDef.append(" ").append(
                        sqlCreate.substring(colMatcher.start(COL_PATTERN_IDX_COLNAME), colMatcher.end()));
            }
            idx = colMatcher.end();
        }
        sql.append(colDef.substring(1));
        sql.append(sqlCreate.substring(idx));

        String tmpSql = sql.toString();
        matcher = TBL_PATTERN.matcher(tmpSql);
        if (matcher.matches()) {
            sql = new StringBuilder(tmpSql.substring(0, matcher.start(TBL_PATTERN_IDX_PK_COLLIST)));
            sql.append(i18nFKColName).append(", language_cd")
                    .append(tmpSql.substring(matcher.end(TBL_PATTERN_IDX_PK_COLLIST)));
        }
    }

    getProfile().duplex(ObjectType.TABLE, i18nTblName, sql.toString());

    return sql.toString();
}