Example usage for java.util.regex MatchResult end

List of usage examples for java.util.regex MatchResult end

Introduction

In this page you can find the example usage for java.util.regex MatchResult end.

Prototype

public int end();

Source Link

Document

Returns the offset after the last character matched.

Usage

From source file:uk.ac.kcl.at.ElasticGazetteerAcceptanceTest.java

private int getTruePositiveTokenCount(Mutant mutant) {
    int count = 0;
    Pattern mask = Pattern.compile("X+");
    List<MatchResult> results = new ArrayList<>();
    Matcher matcher = mask.matcher(mutant.getDeidentifiedString());
    while (matcher.find()) {
        results.add(matcher.toMatchResult());
    }/*from w  ww .  j  a  v a 2  s .c o m*/
    for (MatchResult result : results) {
        StringTokenizer tokenizer = new StringTokenizer(
                mutant.getFinalText().substring(result.start(), result.end()));
        ArrayList<String> arHits = new ArrayList<>();
        while (tokenizer.hasMoreTokens()) {
            arHits.add(tokenizer.nextToken());
        }
        count = getHitCount(mutant, count, arHits);
    }
    return count;
}

From source file:uk.ac.kcl.at.ElasticGazetteerAcceptanceTest.java

private int getFalsePositiveTokenCount(Mutant mutant) {
    int count = 0;
    Pattern mask = Pattern.compile("X+");
    List<MatchResult> results = new ArrayList<>();
    Matcher matcher = mask.matcher(mutant.getDeidentifiedString());
    while (matcher.find()) {
        results.add(matcher.toMatchResult());
    }/*from   w  ww. j  av a2  s  .  c  o  m*/
    for (MatchResult result : results) {
        StringTokenizer tokenizer = new StringTokenizer(
                mutant.getFinalText().substring(result.start(), result.end()));
        ArrayList<String> arHits = new ArrayList<>();
        while (tokenizer.hasMoreTokens()) {
            arHits.add(tokenizer.nextToken());
        }
        for (String hit : arHits) {
            boolean isAnIdentifier = false;
            for (String token : mutant.getOutputTokens()) {
                if (hit.matches(Pattern.quote(token))) {
                    isAnIdentifier = true;
                }
            }
            if (!isAnIdentifier && !hit.equalsIgnoreCase("") && !hit.equalsIgnoreCase("-")) {
                count++;
            }
        }
    }
    return count;
}

From source file:org.springframework.social.twitter.api.impl.TweetDeserializer.java

private void extractTickerSymbolEntitiesFromText(String text, Entities entities) {
    Pattern pattern = Pattern.compile("\\$[A-Za-z]+");
    Matcher matcher = pattern.matcher(text);
    while (matcher.find()) {
        MatchResult matchResult = matcher.toMatchResult();
        String tickerSymbol = matchResult.group().substring(1);
        String url = "https://twitter.com/search?q=%24" + tickerSymbol + "&src=ctag";
        entities.getTickerSymbols().add(new TickerSymbolEntity(tickerSymbol, url,
                new int[] { matchResult.start(), matchResult.end() }));
    }//from  ww w  .  j  a v  a 2s . c o  m
}

From source file:net.osten.watermap.convert.AZTReport.java

private WaterReport parseDataLine(String line) {
    WaterReport result = new WaterReport();

    try {//from  ww  w  . j  a  v  a2s  .co  m
        // Example line:
        // 8.3 8.3 Tub Spring (aka Bathtub Spring) spring 3 full tub; good trickle3/28/15 3/28/15 Bird Food 4/5/15

        // Mileages = first two decimals
        MatchResult decimalsMatch = RegexUtils.matchFirstOccurance(line, decimalPattern);
        if (decimalsMatch == null) {
            log.fine("Mileages not found");
            return null;
        }
        int decimalsEnd = decimalsMatch.end();

        // Type = spring | creek | spring fed | windmill | store | dirt tank | pipe | Town | etc..
        MatchResult typeMatch = RegexUtils.matchFirstOccurance(line, typesPattern);
        if (typeMatch == null) {
            log.fine("Type not found");
            return null;
        }
        log.finer("type=" + typeMatch.group());
        int typeEnd = typeMatch.end();

        // Name = text from second decimal number to type (spring,creek,etc.)
        log.finer("decimalsEnd=" + decimalsEnd + " typeEnd=" + typeEnd);
        String name = line.substring(decimalsEnd, typeEnd);
        result.setName(name.trim());

        // Historic Reliability = int after Type (can be "1 to 2" or "0-2")
        MatchResult histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern3, typeEnd);
        if (histRelMatch == null) {
            histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern2, typeEnd);
            if (histRelMatch == null) {
                histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern1, typeEnd);
                if (histRelMatch == null) {
                    log.fine("Historical Reliability not found");
                    return null;
                }
            }
        }
        log.finer("histRel=" + histRelMatch.group());
        String historicReliability = mapHistoricReliability(histRelMatch.group().trim());
        int histRelEnd = histRelMatch.end();

        // Report Date = second date from right
        int reportDateEnd = -1;
        int reportDateStart = -1;
        List<MatchResult> dates = RegexUtils.matchOccurences(line, datePattern);
        if (dates.size() >= 2) {
            reportDateEnd = dates.get(dates.size() - 2).end();
            reportDateStart = dates.get(dates.size() - 2).start();
        } else {
            log.fine("Only found " + dates.size() + " dates");
            reportDateStart = Math.max(line.length() - 1, histRelEnd);
        }

        // Report = Historic Reliability to Report Date
        log.finer("histRelEnd=" + histRelEnd + " reportDateStart=" + reportDateStart);
        if (histRelEnd >= 0 && reportDateStart >= 0 && reportDateStart >= histRelEnd) {
            String report = line.substring(histRelEnd, reportDateStart);
            result.setDescription(report.trim() + "<br />Historical Reliability:" + historicReliability);
        } else {
            log.fine("cannot find historic reliability");
        }

        // Post Date = first date from right
        int postDateStart = -1;
        MatchResult postDate = RegexUtils.matchLastOccurence(line, datePattern);
        if (postDate == null) {
            log.fine("Post Date not found");
        } else {
            result.setLastReport(dateFormatter.parse(postDate.group()));
            postDateStart = postDate.start();
            log.finer("postDate=" + postDate.group());
        }

        // Reported By = text between Report Date and Post Date
        if (postDateStart >= 0 && reportDateEnd >= 0 && postDateStart > reportDateEnd) {
            String reportedBy = line.substring(reportDateEnd, postDateStart);
            log.finer("reportedBy=" + reportedBy);
        } else {
            log.finer("cannot find reportedBy");
        }

        result.setState(WaterStateParser.parseState(result.getDescription()));
        result.setSource(SOURCE_TITLE);
        result.setUrl(SOURCE_URL);
    } catch (

    ParseException e)

    {
        log.fine("ParseException:" + e.getLocalizedMessage());
    }

    return result;

}

From source file:jp.go.nict.langrid.wrapper.ws_1_2.translation.AbstractTranslationService.java

/**
 * /*  w  ww.  j  ava  2  s .  com*/
 * 
 */
public final String multistatementTranslate(String sourceLang, String targetLang, String source,
        String delimiterRegx)
        throws AccessLimitExceededException, InvalidParameterException, LanguagePairNotUniquelyDecidedException,
        NoAccessPermissionException, NoValidEndpointsException, ProcessFailedException, ServerBusyException,
        ServiceNotActiveException, ServiceNotFoundException, UnsupportedLanguagePairException {
    checkStartupException();
    if (StringUtils.isBlank(delimiterRegx)) {
        throw new InvalidParameterException("delimiterRegx", "is Blank.");
    }
    StringBuilder sb = new StringBuilder();
    Scanner s = new Scanner(source).useDelimiter(delimiterRegx);
    int i = 0;
    while (s.hasNext()) {
        String text = s.next();
        MatchResult m = s.match();
        if (i != m.start()) {
            String tag = source.substring(i, m.start());
            sb.append(tag);
        }
        i = m.end();
        sb.append(invokeDoTranslation(sourceLang, targetLang, text));
    }
    if (source.length() != i) {
        String tag = source.substring(i);
        sb.append(tag);
    }

    return sb.toString();
}

From source file:gate.creole.splitter.RegexSentenceSplitter.java

/**
 * Checks whether a possible match is being vetoed by a non split match. A
 * possible match is vetoed if it any nay overlap with a veto region.
 *
 * @param split the match result representing the split to be tested
 * @param vetoRegions regions where matches are not allowed. For efficiency
 * reasons, this method assumes these regions to be non overlapping and sorted
 * in ascending order./*from   w  ww  .j a va 2s.  c om*/
 * All veto regions that end before the proposed match are also discarded
 * (again for efficiency reasons). This requires the proposed matches to be
 * sent to this method in ascending order, so as to avoid malfunctions.
 * @return <tt>true</tt> iff the proposed split should be ignored
 */
private boolean veto(MatchResult split, List<int[]> vetoRegions) {
    //if no more non splits available, accept everything
    for (Iterator<int[]> vetoRegIter = vetoRegions.iterator(); vetoRegIter.hasNext();) {
        int[] aVetoRegion = vetoRegIter.next();
        if (aVetoRegion[1] - 1 < split.start()) {
            //current veto region ends before the proposed split starts
            //--> discard the veto region
            vetoRegIter.remove();
        } else if (split.end() - 1 < aVetoRegion[0]) {
            //veto region starts after the split ends
            //-> we can return false
            return false;
        } else {
            //we have overlap
            return true;
        }
    }
    //if we got this far, all veto regions are before the split
    return false;
}

From source file:gate.creole.splitter.RegexSentenceSplitter.java

@Override
public void execute() throws ExecutionException {
    interrupted = false;//from   w ww .j a  v  a 2  s. co  m
    int lastProgress = 0;
    fireProgressChanged(lastProgress);
    //get pointers to the annotation sets
    AnnotationSet outputAS = (outputASName == null || outputASName.trim().length() == 0)
            ? document.getAnnotations()
            : document.getAnnotations(outputASName);

    String docText = document.getContent().toString();

    /* If the document's content is empty or contains only whitespace,
     * we drop out right here, since there's nothing to sentence-split.     */
    if (docText.trim().length() < 1) {
        return;
    }

    Matcher internalSplitMatcher = internalSplitsPattern.matcher(docText);
    Matcher externalSplitMatcher = externalSplitsPattern.matcher(docText);

    Matcher nonSplitMatcher = nonSplitsPattern.matcher(docText);
    //store all non split locations in a list of pairs
    List<int[]> nonSplits = new LinkedList<int[]>();
    while (nonSplitMatcher.find()) {
        nonSplits.add(new int[] { nonSplitMatcher.start(), nonSplitMatcher.end() });
    }
    //this lists holds the next matches at each step
    List<MatchResult> nextSplitMatches = new ArrayList<MatchResult>();
    //initialise matching process
    MatchResult internalMatchResult = null;
    if (internalSplitMatcher.find()) {
        internalMatchResult = internalSplitMatcher.toMatchResult();
        nextSplitMatches.add(internalMatchResult);
    }
    MatchResult externalMatchResult = null;
    if (externalSplitMatcher.find()) {
        externalMatchResult = externalSplitMatcher.toMatchResult();
        nextSplitMatches.add(externalMatchResult);
    }
    MatchResultComparator comparator = new MatchResultComparator();
    int lastSentenceEnd = 0;

    while (!nextSplitMatches.isEmpty()) {
        //see which one matches first
        Collections.sort(nextSplitMatches, comparator);
        MatchResult nextMatch = nextSplitMatches.remove(0);
        if (nextMatch == internalMatchResult) {
            //we have a new internal split; see if it's vetoed or not
            if (!veto(nextMatch, nonSplits)) {
                //split is not vetoed
                try {
                    //add the split annotation
                    FeatureMap features = Factory.newFeatureMap();
                    features.put("kind", "internal");
                    outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features);
                    //generate the sentence annotation
                    int endOffset = nextMatch.end();
                    //find the first non whitespace character starting from where the
                    //last sentence ended
                    while (lastSentenceEnd < endOffset
                            && Character.isWhitespace(Character.codePointAt(docText, lastSentenceEnd))) {
                        lastSentenceEnd++;
                    }
                    //if there is any useful text between the two offsets, generate
                    //a new sentence
                    if (lastSentenceEnd < nextMatch.start()) {
                        outputAS.add(new Long(lastSentenceEnd), new Long(endOffset),
                                ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap());
                    }
                    //store the new sentence end
                    lastSentenceEnd = endOffset;
                } catch (InvalidOffsetException e) {
                    // this should never happen
                    throw new ExecutionException(e);
                }
            }
            //prepare for next step
            if (internalSplitMatcher.find()) {
                internalMatchResult = internalSplitMatcher.toMatchResult();
                nextSplitMatches.add(internalMatchResult);
            } else {
                internalMatchResult = null;
            }
        } else if (nextMatch == externalMatchResult) {
            //we have a new external split; see if it's vetoed or not
            if (!veto(nextMatch, nonSplits)) {
                //split is not vetoed
                try {
                    //generate the split
                    FeatureMap features = Factory.newFeatureMap();
                    features.put("kind", "external");
                    outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features);
                    //generate the sentence annotation
                    //find the last non whitespace character, going backward from
                    //where the external skip starts
                    int endOffset = nextMatch.start();
                    while (endOffset > lastSentenceEnd
                            && Character.isSpaceChar(Character.codePointAt(docText, endOffset - 1))) {
                        endOffset--;
                    }
                    //find the first non whitespace character starting from where the
                    //last sentence ended
                    while (lastSentenceEnd < endOffset
                            && Character.isSpaceChar(Character.codePointAt(docText, lastSentenceEnd))) {
                        lastSentenceEnd++;
                    }
                    //if there is any useful text between the two offsets, generate
                    //a new sentence
                    if (lastSentenceEnd < endOffset) {
                        outputAS.add(new Long(lastSentenceEnd), new Long(endOffset),
                                ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap());
                    }
                    //store the new sentence end
                    lastSentenceEnd = nextMatch.end();
                } catch (InvalidOffsetException e) {
                    // this should never happen
                    throw new ExecutionException(e);
                }
            }
            //prepare for next step
            if (externalSplitMatcher.find()) {
                externalMatchResult = externalSplitMatcher.toMatchResult();
                nextSplitMatches.add(externalMatchResult);
            } else {
                externalMatchResult = null;
            }
        } else {
            //malfunction
            throw new ExecutionException("Invalid state - cannot identify match!");
        }
        //report progress
        int newProgress = 100 * lastSentenceEnd / docText.length();
        if (newProgress - lastProgress > 20) {
            lastProgress = newProgress;
            fireProgressChanged(lastProgress);
        }
    } //while(!nextMatches.isEmpty()){
    fireProcessFinished();
}

From source file:magicware.scm.redmine.tools.IssueSyncApp.java

public void execute(SyncItem syncItem) throws IOException, InvalidFormatException {

    FileInputStream in = null;//w w w .  j a v a 2  s .c  om

    try {

        // ?JSON??
        String issueTemplate = FileUtils.readFileAsString(syncItem.getJsonTemplate());

        // ???
        Matcher m = Pattern.compile(Constants.ISSUE_FIELD_VALUE_EXP).matcher(issueTemplate);

        List<MatchResult> mrList = new ArrayList<MatchResult>();

        while (m.find()) {
            MatchResult mr = m.toMatchResult();
            mrList.add(mr);
        }

        // ????
        in = new FileInputStream(syncItem.getFilePath());
        Workbook wb = WorkbookFactory.create(in);

        FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator();

        Sheet sheet = wb.getSheet(syncItem.getSheetName());
        Row row = null;
        Cell cell = null;

        List<String> issues = new ArrayList<String>();

        // ?????
        for (int i = sheet.getLastRowNum(); i >= (syncItem.getKeyRowBeginIdx() > 0
                ? (syncItem.getKeyRowBeginIdx() - 1)
                : 0); i--) {
            // ????
            row = sheet.getRow(i);

            if (row != null) {

                String keyNo = ExcelUtils.getCellContent(row.getCell(syncItem.getKeyColumnIdx() - 1),
                        evaluator);

                // ??????????
                if (StringUtils.isBlank(keyNo)) {
                    break;
                }

                // ????
                if (redmineClient.queryIssue(syncItem.getProjectId(), syncItem.getKeyFiledId(), keyNo) == 0) {
                    StringBuilder newIssue = new StringBuilder();
                    int eolIdx = 0;
                    for (MatchResult matchResult : mrList) {

                        newIssue.append(issueTemplate.substring(eolIdx, matchResult.start()));

                        int cellIndex = Integer.valueOf(matchResult.group(1)) - 1;
                        cell = row.getCell(cellIndex);
                        String cellvalue = ExcelUtils.getCellContent(cell, evaluator);

                        // ?
                        String valueMapStr = matchResult.group(3);
                        Map<String, String> valueMap = null;
                        if (valueMapStr != null) {
                            valueMap = JSON.decode(valueMapStr);
                            if (StringUtils.isNotEmpty(cellvalue) && valueMap.containsKey(cellvalue)) {
                                cellvalue = valueMap.get(cellvalue);
                            } else {
                                cellvalue = valueMap.get("default");
                            }
                        }

                        if (StringUtils.isNotEmpty(cellvalue)) {
                            cellvalue = StringEscapeUtils.escapeJavaScript(cellvalue);
                            newIssue.append(cellvalue);
                        }
                        eolIdx = matchResult.end();
                    }
                    newIssue.append(issueTemplate.substring(eolIdx));
                    issues.add(newIssue.toString());
                } else {
                    // ???
                    break;
                }
            }
        }

        for (int i = issues.size() - 1; i >= 0; i--) {
            Map<String, Issue> issueMap = JSON.decode(issues.get(i));
            log.debug("create new issue >>>");
            log.debug(JSON.encode(issueMap, true));
            redmineClient.createNewIssue(issues.get(i));
        }

    } finally {
        if (in != null) {
            in.close();
            in = null;
        }
    }
}

From source file:au.org.ala.biocache.dao.SearchDAOImpl.java

/**
 * Substitute with i18n properties/* w  w w  .j  ava  2 s  .  c  om*/
 *
 * @param displayText
 * @return
 */
public String formatDisplayStringWithI18n(String displayText) {

    if (StringUtils.trimToNull(displayText) == null)
        return displayText;
    try {
        String formatted = displayText;

        Matcher m = indexFieldPatternMatcher.matcher(displayText);
        int currentPos = 0;
        while (m.find(currentPos)) {
            String matchedIndexTerm = m.group(0).replaceAll(":", "");
            MatchResult mr = m.toMatchResult();
            //if the matched term represents a layer lookup the title in the layers service
            Matcher lm = layersPattern.matcher(matchedIndexTerm);
            String i18n = "";
            if (lm.matches()) {
                i18n = layersService.getName(matchedIndexTerm);
                if (i18n == null) {
                    i18n = matchedIndexTerm;
                }
            } else {
                i18n = messageSource.getMessage("facet." + matchedIndexTerm, null, matchedIndexTerm, null);
            }
            //System.out.println("i18n for " + matchedIndexTerm + " = " + i18n);
            if (!matchedIndexTerm.equals(i18n)) {

                int nextWhitespace = displayText.substring(mr.end()).indexOf(" ");
                String extractedValue = null;
                if (nextWhitespace > 0) {
                    extractedValue = displayText.substring(mr.end(), mr.end() + nextWhitespace);
                } else {
                    //reached the end of the query
                    extractedValue = displayText.substring(mr.end());
                }

                String formattedExtractedValue = SearchUtils.stripEscapedQuotes(extractedValue);

                String i18nForValue = messageSource.getMessage(matchedIndexTerm + "." + formattedExtractedValue,
                        null, "", null);
                if (i18nForValue.length() == 0)
                    i18nForValue = messageSource.getMessage(formattedExtractedValue, null, "", null);

                if (i18nForValue.length() > 0) {
                    formatted = formatted.replaceAll(matchedIndexTerm + ":" + extractedValue,
                            i18n + ":" + i18nForValue);
                } else {
                    //just replace the matched index term
                    formatted = formatted.replaceAll(matchedIndexTerm, i18n);
                }
            }
            currentPos = mr.end();
        }
        return formatted;

    } catch (Exception e) {
        logger.debug(e.getMessage(), e);
        return displayText;
    }
}