Example usage for java.util.regex Matcher groupCount

List of usage examples for java.util.regex Matcher groupCount

Introduction

In this page you can find the example usage for java.util.regex Matcher groupCount.

Prototype

public int groupCount() 

Source Link

Document

Returns the number of capturing groups in this matcher's pattern.

Usage

From source file:com.linkedin.databus2.producers.gg.GGEventGenerationFactory.java

public static String uriToXmlPrefix(String uri) throws DatabusException {
    if (uri == null) {
        throw new DatabusException("uri passed is null and not valid");
    }/* w w w .jav  a 2 s .c  om*/

    Pattern pattern = Pattern.compile("gg://(.*):(.*)");
    Matcher matcher = pattern.matcher(uri);
    if (!matcher.matches() || matcher.groupCount() != 2) {
        throw new DatabusException("Expected uri format for gg path not found");
    }

    return matcher.group(2);
}

From source file:com.linkedin.databus2.producers.gg.GGEventGenerationFactory.java

public static long ggDateStringToLong(String value) throws DatabusException {
    Pattern _pattern = Pattern.compile("(\\d{4}-\\d{2}-\\d{2}).*");
    Matcher matcher = _pattern.matcher(value);
    if (!matcher.matches() || matcher.groupCount() != 1) {
        throw new DatabusException("The date format is not as expected, cannot proceed!");
    }/*ww w.  j  a va  2 s.  c o  m*/
    String dateFormatString = matcher.group(1);
    long dateLong = Date.valueOf(dateFormatString).getTime();
    return dateLong;
}

From source file:de.rnd7.libtvdb.util.EpisodeUtil.java

private static List<EpisodeInfo> parseNameInternal(final String name) throws IOException {
    final String filtered = filter(name.toLowerCase());

    final List<EpisodeInfo> result = new ArrayList<EpisodeInfo>();

    for (final Pattern pattern : PATTERN) {
        final Matcher matcher = pattern.matcher(filtered);

        boolean match = false;
        while (matcher.find()) {
            match = true;//  ww w . j av  a 2 s  .c  o m
            if (matcher.groupCount() == 3) {
                final int season = Integer.parseInt(matcher.group(1));
                final int episodeA = Integer.parseInt(matcher.group(2));
                final int episodeB = Integer.parseInt(matcher.group(3));

                result.add(new EpisodeInfo(season, episodeA));
                result.add(new EpisodeInfo(season, episodeB));
            } else {
                final int season = Integer.parseInt(matcher.group(1));
                final int episode = Integer.parseInt(matcher.group(2));

                result.add(new EpisodeInfo(season, episode));
            }
        }

        if (match) {
            break;
        }
    }

    if (result.isEmpty()) {
        // Fallback for single season series:
        final Pattern pattern = Pattern.compile(".*(\\d\\d).*", Pattern.CASE_INSENSITIVE);
        final Matcher matcher = pattern.matcher(filtered);
        if (matcher.matches()) {
            final int season = 1;
            final int episode = Integer.parseInt(matcher.group(1));

            result.add(new EpisodeInfo(season, episode));
        }
    }

    return result;
}

From source file:com.linkedin.databus2.producers.gg.GGEventGenerationFactory.java

public static long ggTimeStampStringToMilliSeconds(String value) throws DatabusException {
    Pattern _pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2}):(\\d{2}):(\\d{2}):(\\d{2})\\.(\\d{0,9})");
    Matcher matcher = _pattern.matcher(value);
    if (!matcher.matches() || matcher.groupCount() != 7) {
        throw new DatabusException("The timestamp format is not as expected, cannot proceed!");
    }//from  ww w . j a va2  s.c o  m

    Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"));

    //Explicitly set ms to zero; without initialization it has random ms values :(
    calendar.set(Calendar.MILLISECOND, 0);
    calendar.set(Integer.valueOf(matcher.group(1)), Integer.valueOf(matcher.group(2)) - 1,
            Integer.valueOf(matcher.group(3)), Integer.valueOf(matcher.group(4)),
            Integer.valueOf(matcher.group(5)), Integer.valueOf(matcher.group(6)));

    //Prune to the first 3 digits or less
    String milliSecondsString = matcher.group(7);
    int maxSecondsLength = (milliSecondsString.length() > 3) ? 3 : milliSecondsString.length();
    String prunedMilliSeconds = milliSecondsString.substring(0, maxSecondsLength);

    //Add the ms value to the calendar object
    calendar.add(Calendar.MILLISECOND, Integer.valueOf(prunedMilliSeconds));
    return calendar.getTimeInMillis();
}

From source file:com.technophobia.substeps.model.Arguments.java

public static List<Object> getArgs(final String patternString, final String sourceString,
        final Class<?>[] parameterTypes, final Class<? extends Converter<?>>[] converterTypes, Config cfg) {

    log.debug("Arguments getArgs List<Object> with pattern: " + patternString + " and sourceStr: "
            + sourceString);/*  w  w  w. j  av a 2 s  . c om*/

    List<Object> argsList = null;

    final Pattern pattern = Pattern.compile(patternString);
    final Matcher matcher = pattern.matcher(sourceString);

    final int groupCount = matcher.groupCount();

    int argIdx = 0;

    if (matcher.find()) {

        for (int i = 1; i <= groupCount; i++) {
            final String arg = matcher.group(i);

            if (arg != null) {
                if (argsList == null) {
                    argsList = new ArrayList<>();
                }
                String substituted = substituteValues(arg, cfg);

                argsList.add(getObjectArg(substituted, parameterTypes[argIdx], converterTypes[argIdx]));
            }
            argIdx++;
        }
    }

    return argsList;

}

From source file:com.datumbox.framework.core.utilities.text.parsers.HTMLParser.java

/**
 * Extracts the HTML headers (h1-h6 tags) from an HTML page.
 * // w  ww .  j  a va 2 s . c  o  m
 * @param html
 * @return 
 */
public static Map<String, List<String>> extractHTMLheaders(String html) {
    Map<String, List<String>> hxtagsMap = new HashMap<>();
    for (int i = 1; i <= 6; ++i) {
        hxtagsMap.put("H" + i, new ArrayList<>());
    }

    Matcher m = HX_PATTERN.matcher(html);
    while (m.find()) {
        if (m.groupCount() == 2) {
            String tagType = m.group(1).toUpperCase(Locale.ENGLISH);
            String content = m.group(2);
            hxtagsMap.get(tagType).add(clear(content));
        }
    }
    return hxtagsMap;
}

From source file:com.datumbox.framework.core.utilities.text.parsers.HTMLParser.java

/**
 * Extracts the meta tags from an HTML page and returns them in a map.
 * //from   w  w w  . j  a  v  a 2 s  .c o m
 * @param html
 * @return 
 */
public static Map<String, String> extractMetatags(String html) {
    Map<String, String> metatagsMap = new HashMap<>();

    Matcher m = METATAG_PATTERN.matcher(html);
    while (m.find()) {
        if (m.groupCount() == 2) {
            String name = m.group(1);
            String content = m.group(2);
            metatagsMap.put(clear(name), clear(content));
        }
    }
    return metatagsMap;
}

From source file:net.ripe.ipresource.Ipv6Address.java

/**
 * Converts a fully expanded IPv6 string to a BigInteger
 *
 * @param ipAddressString Fully expanded address (i.e. no '::' shortcut)
 * @return Address as BigInteger//www.  ja va  2 s  .  co  m
 */
private static BigInteger ipv6StringtoBigInteger(String ipAddressString) {
    Pattern p = Pattern.compile(
            "([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4})");
    Matcher m = p.matcher(ipAddressString);
    m.find();

    String ipv6Number = "";
    for (int i = 1; i <= m.groupCount(); i++) {
        String part = m.group(i);
        String padding = "0000".substring(0, 4 - part.length());
        ipv6Number = ipv6Number + padding + part;
    }

    return new BigInteger(ipv6Number, 16);
}

From source file:com.datumbox.framework.core.utilities.text.parsers.HTMLParser.java

/**
 * Extracts the hyperlinks from an html string and returns their components 
 * in a map./*from   w w w.  j  av  a  2s. c o m*/
 * 
 * @param html
 * @return 
 */
public static Map<HyperlinkPart, List<String>> extractHyperlinks(String html) {
    Map<HyperlinkPart, List<String>> hyperlinksMap = new HashMap<>();
    hyperlinksMap.put(HyperlinkPart.HTMLTAG, new ArrayList<>());
    hyperlinksMap.put(HyperlinkPart.URL, new ArrayList<>());
    hyperlinksMap.put(HyperlinkPart.ANCHORTEXT, new ArrayList<>());

    Matcher m = HYPERLINK_PATTERN.matcher(html);
    while (m.find()) {
        if (m.groupCount() == 2) {
            String tag = m.group(0);
            String url = m.group(1);
            String anchortext = m.group(2);
            hyperlinksMap.get(HyperlinkPart.HTMLTAG).add(tag);
            hyperlinksMap.get(HyperlinkPart.URL).add(url);
            hyperlinksMap.get(HyperlinkPart.ANCHORTEXT).add(anchortext);
        }
    }
    return hyperlinksMap;
}

From source file:org.apache.falcon.util.DateUtil.java

/**
 * {@link java.util.TimeZone#getTimeZone(String)} takes the timezone ID as an argument; for invalid IDs
 * it returns the <code>GMT</code> TimeZone.  A timezone ID formatted like <code>GMT-####</code> is not a valid ID,
 * however, it will actually map this to the <code>GMT-##:##</code> TimeZone, instead of returning the
 * <code>GMT</code> TimeZone.  We check (later) check that a timezone ID is valid by calling
 * {@link java.util.TimeZone#getTimeZone(String)} and seeing if the returned
 * TimeZone ID is equal to the original; because we want to allow <code>GMT-####</code>, while still
 * disallowing actual invalid IDs, we have to manually replace <code>GMT-####</code>
 * with <code>GMT-##:##</code> first.
 *
 * @param tzId The timezone ID/*from  www.  java2  s.c  o m*/
 * @return If tzId matches <code>GMT-####</code>, then we return <code>GMT-##:##</code>; otherwise,
 * we return tzId unaltered
 */
private static String handleGMTOffsetTZNames(String tzId) {
    Matcher m = GMT_OFFSET_COLON_PATTERN.matcher(tzId);
    if (m.matches() && m.groupCount() == 3) {
        tzId = "GMT" + m.group(1) + m.group(2) + ":" + m.group(3);
    }
    return tzId;
}