Example usage for java.util.regex Matcher groupCount

List of usage examples for java.util.regex Matcher groupCount


In this page you can find the example usage for java.util.regex Matcher groupCount.


public int groupCount() 

Source Link


Returns the number of capturing groups in this matcher's pattern.


From source file:com.epimorphics.appbase.templates.Lib.java

 * Match a string to a regex and return a vector of the matching groups
 *///from  w w  w  . j a v a 2  s .c  om
public String[] regex(Object data, String regex) {
    Matcher m = Pattern.compile(regex).matcher(data.toString());
    if (m.matches()) {
        String[] result = new String[m.groupCount()];
        for (int i = 0; i < m.groupCount(); i++) {
            result[i] = m.group(i + 1);
        return result;
    } else {
        return null;

From source file:org.apache.ctakes.ytex.uima.annotators.SegmentRegexAnnotator.java

 * Add Segment annotations to the cas. First create a list of segments. Then
 * sort the list according to segment start. For each segment that has no
 * end, set the end to the [beginning of next segment - 1], or the eof.
 *///  w  w w  .j av  a  2 s . c o m
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    String strDocText = aJCas.getDocumentText();
    if (strDocText == null)
    List<Segment> segmentsAdded = new ArrayList<Segment>();
    // find all the segments, set begin and id, add to list
    for (Map.Entry<SegmentRegex, Pattern> entry : regexMap.entrySet()) {
        if (log.isDebugEnabled()) {
            log.debug("applying regex:" + entry.getKey().getRegex());
        Matcher matcher = entry.getValue().matcher(strDocText);
        while (matcher.find()) {
            Segment seg = new Segment(aJCas);
            if (entry.getKey().isLimitToRegex() && matcher.groupCount() == 1) {
            } else {
                if (entry.getKey().isLimitToRegex()) {
            if (log.isDebugEnabled()) {
                log.debug("found match: id=" + seg.getId() + ", begin=" + seg.getBegin());
    if (log.isDebugEnabled()) {
        log.debug("segmentsAdded: " + segmentsAdded.size());
    if (segmentsAdded.size() > 0) {
        // sort the segments by begin
        Collections.sort(segmentsAdded, new Comparator<Segment>() {

            // @Override
            public int compare(Segment o1, Segment o2) {
                return o1.getBegin() < o2.getBegin() ? -1 : o1.getBegin() > o2.getBegin() ? 1 : 0;

        // set the end for each segment
        for (int i = 0; i < segmentsAdded.size(); i++) {
            Segment seg = segmentsAdded.get(i);
            Segment segNext = (i + 1) < segmentsAdded.size() ? segmentsAdded.get(i + 1) : null;
            if (seg.getEnd() <= 0) {
                if (segNext != null) {
                    // set end to beginning of next segment
                    seg.setEnd(segNext.getBegin() - 1);
                } else {
                    // set end to doc end
            } else {
                // segments shouldn't overlap
                if (segNext != null && segNext.getBegin() < seg.getEnd()) {
                    seg.setEnd(segNext.getBegin() - 1);
            if (log.isDebugEnabled()) {
                log.debug("Adding Segment: segment id=" + seg.getId() + ", begin=" + seg.getBegin() + ", end="
                        + seg.getEnd());
    // ctakes 1.3.2 - anything not in a segment will not be annotated - add
    // text outside segments to the 'default' segment
    int end = 0;
    for (Segment seg : segmentsAdded) {
        if ((seg.getBegin() - 1) > end) {
            addGapSegment(aJCas, end, seg.getBegin() - 1);
        end = seg.getEnd();
    if (end < strDocText.length()) {
        addGapSegment(aJCas, end, strDocText.length());

From source file:com.amalto.core.jobox.component.JobAware.java

public JobInfo loadJobInfo(String entityName) {
    JobInfo jobInfo = null;// w ww .  j  a v  a  2  s .c o  m
    File entity = new File(workDir + File.separator + entityName);
    if (entity.exists()) {
        // parse name and version
        String jobVersion = StringUtils.EMPTY;
        String jobName = StringUtils.EMPTY;
        Matcher m = jobVersionNamePattern.matcher(entityName);
        while (m.find()) {
            jobName = m.group(1);
            jobVersion = m.group(m.groupCount());
        jobInfo = new JobInfo(jobName, jobVersion);
        setClassPath4TISJob(entity, jobInfo);
        // get main class from command line
        guessMainClassFromCommandLine(entity, jobInfo);
        //not found then found it in context properties folder
        if (jobInfo.getMainClass() == null) {
            String propFilePath = analyzeJobParams(entity, jobInfo);
            guessMainClass(propFilePath, jobInfo);
    return jobInfo;

From source file:org.eclipse.rdf4j.repository.http.HTTPRepository.java

public HTTPRepository(final String repositoryURL) {
    this();//w w  w. ja va  2 s.  c  o  m
    // Try to parse the server URL from the repository URL
    Pattern urlPattern = Pattern.compile("(.*)/" + Protocol.REPOSITORIES + "/[^/]*/?");
    Matcher matcher = urlPattern.matcher(repositoryURL);

    if (matcher.matches() && matcher.groupCount() == 1) {
        this.serverURL = matcher.group(1);
    } else {
        throw new IllegalArgumentException("URL must be to a Sesame Repository (not just the server)");
    this.repositoryURL = repositoryURL;

From source file:org.apache.flume.ext.interceptor.RegexExtractorInterceptor.java

public Event intercept(Event event) {
    Matcher matcher = regex.matcher(new String(event.getBody(), Charsets.UTF_8));
    Map<String, String> headers = event.getHeaders();
    if (matcher.find()) {
        for (int group = 0, count = matcher.groupCount(); group < count; group++) {
            int groupIndex = group + 1;
            if (groupIndex > serializers.size()) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Skipping group {} to {} due to missing serializer", group, count);
                }/*w  ww. ja v a  2  s.co m*/
            NameAndSerializer serializer = serializers.get(group);
            if (logger.isDebugEnabled()) {
                logger.debug("Serializing {} using {}", serializer.headerName, serializer.serializer);
            headers.put(serializer.headerName, serializer.serializer.serialize(matcher.group(groupIndex)));
    return event;

From source file:org.apache.oozie.util.XLogStreamer.java

 * Returns the creation time of the .gz archive if it is relevant to the job
 * @param fileName/*from   ww w  .j  a v a 2 s .  com*/
 * @param startTime
 * @param endTime
 * @return Modification time of .gz file after checking if it is relevant to the job
private long getGZFileCreationTime(String fileName, long startTime, long endTime) {
    // Default return value of -1 to exclude the file
    long returnVal = -1;

    // Include oozie.log as oozie.log.gz if it is accidentally GZipped
    if (fileName.equals("oozie.log.gz")) {
        LOG.warn("oozie.log has been GZipped, which is unexpected");
        // Return a value other than -1 to include the file in list
        returnVal = 0;
    } else {
        Matcher m = gzTimePattern.matcher(fileName);
        if (m.matches() && m.groupCount() == 4) {
            int year = Integer.parseInt(m.group(1));
            int month = Integer.parseInt(m.group(2));
            int day = Integer.parseInt(m.group(3));
            int hour = Integer.parseInt(m.group(4));
            int minute = 0;
            Calendar calendarEntry = Calendar.getInstance();
            calendarEntry.set(year, month - 1, day, hour, minute); // give month-1(Say, 7 for August)
            long logFileStartTime = calendarEntry.getTimeInMillis();
            long milliSecondsPerHour = 3600000;
            long logFileEndTime = logFileStartTime + milliSecondsPerHour;
            /*  To check whether the log content is there in the initial or later part of the log file or
            the log content is contained entirely within this log file or
            the entire log file contains the event log where the event spans across hours
            if ((startTime >= logFileStartTime && startTime <= logFileEndTime)
                    || (endTime >= logFileStartTime && endTime <= logFileEndTime)
                    || (startTime <= logFileStartTime && endTime >= logFileEndTime)) {
                returnVal = logFileStartTime;
        } else {
            LOG.debug("Filename " + fileName + " does not match the expected format");
            returnVal = -1;
    return returnVal;

From source file:org.apache.hadoop.hive.serde2.RegexSerDe.java

public Object deserialize(Writable blob) throws SerDeException {

    Text rowText = (Text) blob;
    Matcher m = inputPattern.matcher(rowText.toString());

    if (m.groupCount() != numColumns) {
        throw new SerDeException("Number of matching groups doesn't match the number of columns");
    }//from  w  w  w .  j a  v  a2 s.  co m

    // If do not match, ignore the line, return a row with all nulls.
    if (!m.matches()) {
        if (!alreadyLoggedNoMatch) {
            // Report the row if its the first time
            LOG.warn("" + unmatchedRowsCount + " unmatched rows are found: " + rowText);
            alreadyLoggedNoMatch = true;
        return null;

    // Otherwise, return the row.
    for (int c = 0; c < numColumns; c++) {
        try {
            String t = m.group(c + 1);
            TypeInfo typeInfo = columnTypes.get(c);

            // Convert the column to the correct type when needed and set in row obj
            PrimitiveTypeInfo pti = (PrimitiveTypeInfo) typeInfo;
            switch (pti.getPrimitiveCategory()) {
            case STRING:
                row.set(c, t);
            case BYTE:
                Byte b;
                b = Byte.valueOf(t);
                row.set(c, b);
            case SHORT:
                Short s;
                s = Short.valueOf(t);
                row.set(c, s);
            case INT:
                Integer i;
                i = Integer.valueOf(t);
                row.set(c, i);
            case LONG:
                Long l;
                l = Long.valueOf(t);
                row.set(c, l);
            case FLOAT:
                Float f;
                f = Float.valueOf(t);
                row.set(c, f);
            case DOUBLE:
                Double d;
                d = Double.valueOf(t);
                row.set(c, d);
            case BOOLEAN:
                Boolean bool;
                bool = Boolean.valueOf(t);
                row.set(c, bool);
            case TIMESTAMP:
                Timestamp ts;
                ts = Timestamp.valueOf(t);
                row.set(c, ts);
            case DATE:
                Date date;
                date = Date.valueOf(t);
                row.set(c, date);
            case DECIMAL:
                HiveDecimal bd = HiveDecimal.create(t);
                row.set(c, bd);
            case CHAR:
                HiveChar hc = new HiveChar(t, ((CharTypeInfo) typeInfo).getLength());
                row.set(c, hc);
            case VARCHAR:
                HiveVarchar hv = new HiveVarchar(t, ((VarcharTypeInfo) typeInfo).getLength());
                row.set(c, hv);
                throw new SerDeException("Unsupported type " + typeInfo);
        } catch (RuntimeException e) {
            if (!alreadyLoggedPartialMatch) {
                // Report the row if its the first row
                LOG.warn("" + partialMatchedRowsCount + " partially unmatched rows are found, "
                        + " cannot find group " + c + ": " + rowText);
                alreadyLoggedPartialMatch = true;
            row.set(c, null);
    return row;

From source file:org.berlin.crawl.parse.WebParser.java

protected void processFullURL(final List<BotLink> linksForProcessing, final Link tkLink, final String u) {
    String scheme = "";
    String host = "";
    String path = "";
    String query = "";
    final Matcher m = SIMPLE_LINK.matcher(u);
    while (m.find()) {
        if (m.groupCount() >= 2) {
            scheme = m.group(1).trim();// w  w  w  .j a v  a  2s . com
            final String tmp = m.group(2).trim();
            final Matcher m2 = SIMPLE_LINK2.matcher(tmp);
            while (m2.find()) {
                if (m2.groupCount() >= 2) {
                    host = m2.group(1).trim();
                    // At this point we should have a path
                    // Remove the 'query' section if available
                    final String tmp2 = m2.group(2).trim();
                    if (tmp2.indexOf('?') > 0) {
                        final String wQuery = tmp2.substring(tmp2.indexOf('?') + 1);
                        path = tmp2.substring(0, tmp2.indexOf('?'));
                        query = wQuery;
                    } else {
                        path = tmp2;
                } // End of the if //
    } // End of the while             
    if (scheme.length() > 0 && host.length() > 0) {
        // Create a link for for further processing //
        final BotLink link = new BotLink();
        if (path.length() > 0) {
            link.setPath("/" + path);
        } // End of the if //
        logger.info("Attempt to process and add to queue / link , link=" + link);
    } // End of the if //             


From source file:org.atomserver.core.dbstore.DBBasedAtomService.java

@ManagedOperation(description = "obliterate entries.")
public String obliterateEntries(String entriesQueries) {
    StringBuilder builder = new StringBuilder();

    String[] queries = entriesQueries.split(",");
    for (String query : queries) {
        Matcher matcher = ENTRY_ID_PATTERN.matcher(query);
        builder.append("(").append(query).append(" : ");
        if (matcher.matches()) {
            EntryDescriptor descriptor = new BaseEntryDescriptor(matcher.group(1),
                    matcher.groupCount() >= 2 ? matcher.group(2) : null,
                    matcher.groupCount() >= 3 ? matcher.group(3) : null,
                    matcher.groupCount() >= 4 ? LocaleUtils.toLocale(matcher.group(4)) : null);
            List<EntryMetaData> list = entriesDAO.selectEntries(descriptor);
            if (list.size() > obliterateThreshold && !query.endsWith("!")) {
                builder.append("would have obliterated more than ").append(obliterateThreshold)
                        .append(" entries (").append(list.size()).append(") - try ").append(query)
                        .append("! instead.");
            } else {
                for (EntryMetaData entry : list) {
                    ((DBBasedAtomCollection) getAtomWorkspace(descriptor.getWorkspace())
                }/*  w  w  w .  j  a v  a 2 s.co  m*/
                builder.append("obliterated ").append(list.size()).append(" entries.");
        } else {
            builder.append("error - doesn't match workspace/collection/entryId?locale=xx_XX");
    return builder.toString();

From source file:com.mgmtp.jfunk.web.CapabilitiesProvider.java

public Map<String, DesiredCapabilities> get() {
    Configuration config = configProvider.get();

    Map<String, Map<String, List<JFunkCapability>>> capabilitiesMap = newHashMap();
    for (Entry<String, String> entry : config.entrySet()) {
        String key = entry.getKey();
        Matcher matcher = CAPABILITIES_PREFIX_PATTERN.matcher(key);
        if (matcher.find()) {
            String driverType = matcher.groupCount() == 1 && matcher.group(1) != null ? matcher.group(1)
                    : "global";
            String capabilityString = key.substring(matcher.end() + 1);
            int lastDotIndex = capabilityString.lastIndexOf('.');
            String value = entry.getValue();

            JFunkCapability capability;/*from   ww w.  ja v a2s  .  c om*/
            if (lastDotIndex != -1) {
                JFunkCapabilityType type = JFunkCapabilityType.LIST;
                try {
                    Integer.parseInt(capabilityString.substring(lastDotIndex + 1));
                    capabilityString = capabilityString.substring(0, lastDotIndex);
                } catch (NumberFormatException ex) {
                    // not a list capability
                    type = JFunkCapabilityType.STRING;
                capability = new JFunkCapability(capabilityString, value, type);
            } else {
                capability = new JFunkCapability(capabilityString, value, JFunkCapabilityType.STRING);

            Map<String, List<JFunkCapability>> map = capabilitiesMap.get(driverType);
            if (map == null) {
                map = newHashMapWithExpectedSize(5);
                capabilitiesMap.put(driverType, map);
            List<JFunkCapability> list = map.get(capability.name);
            if (list == null) {
                list = newArrayListWithExpectedSize(1);
                map.put(capability.name, list);

    Map<String, List<JFunkCapability>> tmpGlobals = capabilitiesMap.remove("global");
    final Map<String, Object> globalCapabilities = tmpGlobals == null ? ImmutableMap.<String, Object>of()
            : transformCapabilities(tmpGlobals);

    final Proxy proxy = createProxyFromConfig(config);

    // transform in to map of capabilities for each webdriver type
    final Map<String, DesiredCapabilities> byDriverTypeCapabilities = transformEntries(capabilitiesMap,
            new EntryTransformer<String, Map<String, List<JFunkCapability>>, DesiredCapabilities>() {
                public DesiredCapabilities transformEntry(final String key,
                        final Map<String, List<JFunkCapability>> value) {
                    Map<String, Object> capabilities = newHashMap(globalCapabilities);
                    Map<String, Object> transformedCapabilities = transformCapabilities(value);

                    DesiredCapabilities result = new DesiredCapabilities(capabilities);
                    if (proxy != null) {
                        result.setCapability(CapabilityType.PROXY, proxy);
                    return result;

    // wrap, so we get empty capabilities instead of nulls
    return new ForwardingMap<String, DesiredCapabilities>() {
        protected Map<String, DesiredCapabilities> delegate() {
            return byDriverTypeCapabilities;

        public DesiredCapabilities get(final Object key) {
            DesiredCapabilities capabilities = super.get(key);
            if (capabilities == null) {
                DesiredCapabilities desiredCapabilities = new DesiredCapabilities();
                if (proxy != null) {
                    desiredCapabilities.setCapability(CapabilityType.PROXY, proxy);
                capabilities = desiredCapabilities;
            return capabilities;