Example usage for java.util.regex MatchResult start

List of usage examples for java.util.regex MatchResult start

Introduction

In this page you can find the example usage for java.util.regex MatchResult start.

Prototype

public int start();

Source Link

Document

Returns the start index of the match.

Usage

From source file:net.solarnetwork.util.StringMerger.java

/**
 * Merge from a String source into a StringBuilder.
 * /*from w w  w.  j  a  v a  2 s.c o m*/
 * @param src
 *        the source String to substitute into
 * @param data
 *        the data object to substitute with
 * @param nullValue
 *        the value to substitute for null data
 * @param buf
 *        the StringBuilder to append the output to
 */
public static void mergeString(String src, Object data, String nullValue, StringBuilder buf) {
    Matcher matcher = MERGE_VAR_PAT.matcher(src);

    //MatchResult[] matches = MERGE_VAR_PAT.matcher(src);
    //REMatch[] matches = MERGE_VAR_RE.getAllMatches(src);
    if (!matcher.find()) {
        buf.append(src);
    } else {
        int endLastMatchIdx = 0;
        do {
            MatchResult matchResult = matcher.toMatchResult();

            // append everything from the end of the last
            // match to the start of this match
            buf.append(src.substring(endLastMatchIdx, matchResult.start()));

            // perform substitution here...
            if (data != null) {
                int s = matchResult.start(1);
                int e = matchResult.end(1);
                if ((s > -1) && (e > -1)) {
                    String varName = src.substring(s, e);
                    if (data instanceof java.util.Map<?, ?>) {
                        Object o = null;
                        int sepIdx = varName.indexOf('.');
                        if (sepIdx > 0) {
                            String varName2 = varName.substring(sepIdx + 1);
                            varName = varName.substring(0, sepIdx);
                            o = ((Map<?, ?>) data).get(varName);
                            if (o != null) {
                                try {
                                    o = PropertyUtils.getProperty(o, varName2);
                                } catch (Exception e2) {
                                    LOG.warn("Exception getting property '" + varName2 + "' out of "
                                            + o.getClass() + ": " + e2);
                                }
                            }
                        } else {
                            // simply check for key
                            o = ((Map<?, ?>) data).get(varName);
                        }
                        if (o == null || (String.class.isAssignableFrom(o.getClass())
                                && !StringUtils.hasText(o.toString()))) {
                            buf.append(nullValue);
                        } else {
                            buf.append(o);
                        }
                    } else {
                        // use reflection to get a bean property
                        try {
                            Object o = PropertyUtils.getProperty(data, varName);
                            if (o == null || (String.class.isAssignableFrom(o.getClass())
                                    && !StringUtils.hasText(o.toString()))) {
                                buf.append(nullValue);
                            } else {
                                buf.append(o);
                            }
                        } catch (Exception ex) {
                            LOG.warn("Exception getting property '" + varName + "' out of " + data.getClass()
                                    + ": " + ex);
                            buf.append(nullValue);
                        }
                    }
                }
                endLastMatchIdx = matchResult.end();
            }
        } while (matcher.find());

        if (endLastMatchIdx < src.length()) {
            buf.append(src.substring(endLastMatchIdx));
        }
    }
}

From source file:magicware.scm.redmine.tools.IssueSyncApp.java

public void execute(SyncItem syncItem) throws IOException, InvalidFormatException {

    FileInputStream in = null;//from  w  w w. j  ava2 s. c  om

    try {

        // ?JSON??
        String issueTemplate = FileUtils.readFileAsString(syncItem.getJsonTemplate());

        // ???
        Matcher m = Pattern.compile(Constants.ISSUE_FIELD_VALUE_EXP).matcher(issueTemplate);

        List<MatchResult> mrList = new ArrayList<MatchResult>();

        while (m.find()) {
            MatchResult mr = m.toMatchResult();
            mrList.add(mr);
        }

        // ????
        in = new FileInputStream(syncItem.getFilePath());
        Workbook wb = WorkbookFactory.create(in);

        FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator();

        Sheet sheet = wb.getSheet(syncItem.getSheetName());
        Row row = null;
        Cell cell = null;

        List<String> issues = new ArrayList<String>();

        // ?????
        for (int i = sheet.getLastRowNum(); i >= (syncItem.getKeyRowBeginIdx() > 0
                ? (syncItem.getKeyRowBeginIdx() - 1)
                : 0); i--) {
            // ????
            row = sheet.getRow(i);

            if (row != null) {

                String keyNo = ExcelUtils.getCellContent(row.getCell(syncItem.getKeyColumnIdx() - 1),
                        evaluator);

                // ??????????
                if (StringUtils.isBlank(keyNo)) {
                    break;
                }

                // ????
                if (redmineClient.queryIssue(syncItem.getProjectId(), syncItem.getKeyFiledId(), keyNo) == 0) {
                    StringBuilder newIssue = new StringBuilder();
                    int eolIdx = 0;
                    for (MatchResult matchResult : mrList) {

                        newIssue.append(issueTemplate.substring(eolIdx, matchResult.start()));

                        int cellIndex = Integer.valueOf(matchResult.group(1)) - 1;
                        cell = row.getCell(cellIndex);
                        String cellvalue = ExcelUtils.getCellContent(cell, evaluator);

                        // ?
                        String valueMapStr = matchResult.group(3);
                        Map<String, String> valueMap = null;
                        if (valueMapStr != null) {
                            valueMap = JSON.decode(valueMapStr);
                            if (StringUtils.isNotEmpty(cellvalue) && valueMap.containsKey(cellvalue)) {
                                cellvalue = valueMap.get(cellvalue);
                            } else {
                                cellvalue = valueMap.get("default");
                            }
                        }

                        if (StringUtils.isNotEmpty(cellvalue)) {
                            cellvalue = StringEscapeUtils.escapeJavaScript(cellvalue);
                            newIssue.append(cellvalue);
                        }
                        eolIdx = matchResult.end();
                    }
                    newIssue.append(issueTemplate.substring(eolIdx));
                    issues.add(newIssue.toString());
                } else {
                    // ???
                    break;
                }
            }
        }

        for (int i = issues.size() - 1; i >= 0; i--) {
            Map<String, Issue> issueMap = JSON.decode(issues.get(i));
            log.debug("create new issue >>>");
            log.debug(JSON.encode(issueMap, true));
            redmineClient.createNewIssue(issues.get(i));
        }

    } finally {
        if (in != null) {
            in.close();
            in = null;
        }
    }
}

From source file:uk.ac.kcl.at.ElasticGazetteerAcceptanceTest.java

private int getTruePositiveTokenCount(Mutant mutant) {
    int count = 0;
    Pattern mask = Pattern.compile("X+");
    List<MatchResult> results = new ArrayList<>();
    Matcher matcher = mask.matcher(mutant.getDeidentifiedString());
    while (matcher.find()) {
        results.add(matcher.toMatchResult());
    }/*from  w  w w. j a  va  2 s .  c  o m*/
    for (MatchResult result : results) {
        StringTokenizer tokenizer = new StringTokenizer(
                mutant.getFinalText().substring(result.start(), result.end()));
        ArrayList<String> arHits = new ArrayList<>();
        while (tokenizer.hasMoreTokens()) {
            arHits.add(tokenizer.nextToken());
        }
        count = getHitCount(mutant, count, arHits);
    }
    return count;
}

From source file:uk.ac.kcl.at.ElasticGazetteerAcceptanceTest.java

private int getFalsePositiveTokenCount(Mutant mutant) {
    int count = 0;
    Pattern mask = Pattern.compile("X+");
    List<MatchResult> results = new ArrayList<>();
    Matcher matcher = mask.matcher(mutant.getDeidentifiedString());
    while (matcher.find()) {
        results.add(matcher.toMatchResult());
    }//from w  ww  .ja  v a2s.  c o  m
    for (MatchResult result : results) {
        StringTokenizer tokenizer = new StringTokenizer(
                mutant.getFinalText().substring(result.start(), result.end()));
        ArrayList<String> arHits = new ArrayList<>();
        while (tokenizer.hasMoreTokens()) {
            arHits.add(tokenizer.nextToken());
        }
        for (String hit : arHits) {
            boolean isAnIdentifier = false;
            for (String token : mutant.getOutputTokens()) {
                if (hit.matches(Pattern.quote(token))) {
                    isAnIdentifier = true;
                }
            }
            if (!isAnIdentifier && !hit.equalsIgnoreCase("") && !hit.equalsIgnoreCase("-")) {
                count++;
            }
        }
    }
    return count;
}

From source file:org.springframework.social.twitter.api.impl.TweetDeserializer.java

private void extractTickerSymbolEntitiesFromText(String text, Entities entities) {
    Pattern pattern = Pattern.compile("\\$[A-Za-z]+");
    Matcher matcher = pattern.matcher(text);
    while (matcher.find()) {
        MatchResult matchResult = matcher.toMatchResult();
        String tickerSymbol = matchResult.group().substring(1);
        String url = "https://twitter.com/search?q=%24" + tickerSymbol + "&src=ctag";
        entities.getTickerSymbols().add(new TickerSymbolEntity(tickerSymbol, url,
                new int[] { matchResult.start(), matchResult.end() }));
    }//from  w  w w. j ava  2  s .c  om
}

From source file:jp.go.nict.langrid.wrapper.ws_1_2.translation.AbstractTranslationService.java

/**
 * //from w w w. ja  va 2 s  . co  m
 * 
 */
public final String multistatementTranslate(String sourceLang, String targetLang, String source,
        String delimiterRegx)
        throws AccessLimitExceededException, InvalidParameterException, LanguagePairNotUniquelyDecidedException,
        NoAccessPermissionException, NoValidEndpointsException, ProcessFailedException, ServerBusyException,
        ServiceNotActiveException, ServiceNotFoundException, UnsupportedLanguagePairException {
    checkStartupException();
    if (StringUtils.isBlank(delimiterRegx)) {
        throw new InvalidParameterException("delimiterRegx", "is Blank.");
    }
    StringBuilder sb = new StringBuilder();
    Scanner s = new Scanner(source).useDelimiter(delimiterRegx);
    int i = 0;
    while (s.hasNext()) {
        String text = s.next();
        MatchResult m = s.match();
        if (i != m.start()) {
            String tag = source.substring(i, m.start());
            sb.append(tag);
        }
        i = m.end();
        sb.append(invokeDoTranslation(sourceLang, targetLang, text));
    }
    if (source.length() != i) {
        String tag = source.substring(i);
        sb.append(tag);
    }

    return sb.toString();
}

From source file:net.osten.watermap.convert.AZTReport.java

private WaterReport parseDataLine(String line) {
    WaterReport result = new WaterReport();

    try {//from   w w  w.  j a  va  2s . co m
        // Example line:
        // 8.3 8.3 Tub Spring (aka Bathtub Spring) spring 3 full tub; good trickle3/28/15 3/28/15 Bird Food 4/5/15

        // Mileages = first two decimals
        MatchResult decimalsMatch = RegexUtils.matchFirstOccurance(line, decimalPattern);
        if (decimalsMatch == null) {
            log.fine("Mileages not found");
            return null;
        }
        int decimalsEnd = decimalsMatch.end();

        // Type = spring | creek | spring fed | windmill | store | dirt tank | pipe | Town | etc..
        MatchResult typeMatch = RegexUtils.matchFirstOccurance(line, typesPattern);
        if (typeMatch == null) {
            log.fine("Type not found");
            return null;
        }
        log.finer("type=" + typeMatch.group());
        int typeEnd = typeMatch.end();

        // Name = text from second decimal number to type (spring,creek,etc.)
        log.finer("decimalsEnd=" + decimalsEnd + " typeEnd=" + typeEnd);
        String name = line.substring(decimalsEnd, typeEnd);
        result.setName(name.trim());

        // Historic Reliability = int after Type (can be "1 to 2" or "0-2")
        MatchResult histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern3, typeEnd);
        if (histRelMatch == null) {
            histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern2, typeEnd);
            if (histRelMatch == null) {
                histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern1, typeEnd);
                if (histRelMatch == null) {
                    log.fine("Historical Reliability not found");
                    return null;
                }
            }
        }
        log.finer("histRel=" + histRelMatch.group());
        String historicReliability = mapHistoricReliability(histRelMatch.group().trim());
        int histRelEnd = histRelMatch.end();

        // Report Date = second date from right
        int reportDateEnd = -1;
        int reportDateStart = -1;
        List<MatchResult> dates = RegexUtils.matchOccurences(line, datePattern);
        if (dates.size() >= 2) {
            reportDateEnd = dates.get(dates.size() - 2).end();
            reportDateStart = dates.get(dates.size() - 2).start();
        } else {
            log.fine("Only found " + dates.size() + " dates");
            reportDateStart = Math.max(line.length() - 1, histRelEnd);
        }

        // Report = Historic Reliability to Report Date
        log.finer("histRelEnd=" + histRelEnd + " reportDateStart=" + reportDateStart);
        if (histRelEnd >= 0 && reportDateStart >= 0 && reportDateStart >= histRelEnd) {
            String report = line.substring(histRelEnd, reportDateStart);
            result.setDescription(report.trim() + "<br />Historical Reliability:" + historicReliability);
        } else {
            log.fine("cannot find historic reliability");
        }

        // Post Date = first date from right
        int postDateStart = -1;
        MatchResult postDate = RegexUtils.matchLastOccurence(line, datePattern);
        if (postDate == null) {
            log.fine("Post Date not found");
        } else {
            result.setLastReport(dateFormatter.parse(postDate.group()));
            postDateStart = postDate.start();
            log.finer("postDate=" + postDate.group());
        }

        // Reported By = text between Report Date and Post Date
        if (postDateStart >= 0 && reportDateEnd >= 0 && postDateStart > reportDateEnd) {
            String reportedBy = line.substring(reportDateEnd, postDateStart);
            log.finer("reportedBy=" + reportedBy);
        } else {
            log.finer("cannot find reportedBy");
        }

        result.setState(WaterStateParser.parseState(result.getDescription()));
        result.setSource(SOURCE_TITLE);
        result.setUrl(SOURCE_URL);
    } catch (

    ParseException e)

    {
        log.fine("ParseException:" + e.getLocalizedMessage());
    }

    return result;

}

From source file:gate.creole.splitter.RegexSentenceSplitter.java

/**
 * Checks whether a possible match is being vetoed by a non split match. A
 * possible match is vetoed if it any nay overlap with a veto region.
 *
 * @param split the match result representing the split to be tested
 * @param vetoRegions regions where matches are not allowed. For efficiency
 * reasons, this method assumes these regions to be non overlapping and sorted
 * in ascending order.//from   w w  w  .j  a v  a  2  s  .  c  om
 * All veto regions that end before the proposed match are also discarded
 * (again for efficiency reasons). This requires the proposed matches to be
 * sent to this method in ascending order, so as to avoid malfunctions.
 * @return <tt>true</tt> iff the proposed split should be ignored
 */
private boolean veto(MatchResult split, List<int[]> vetoRegions) {
    //if no more non splits available, accept everything
    for (Iterator<int[]> vetoRegIter = vetoRegions.iterator(); vetoRegIter.hasNext();) {
        int[] aVetoRegion = vetoRegIter.next();
        if (aVetoRegion[1] - 1 < split.start()) {
            //current veto region ends before the proposed split starts
            //--> discard the veto region
            vetoRegIter.remove();
        } else if (split.end() - 1 < aVetoRegion[0]) {
            //veto region starts after the split ends
            //-> we can return false
            return false;
        } else {
            //we have overlap
            return true;
        }
    }
    //if we got this far, all veto regions are before the split
    return false;
}

From source file:de.dfki.km.leech.parser.wikipedia.WikipediaDumpParser.java

protected void parseInfoBox(String strText, Metadata metadata, ContentHandler handler) throws SAXException {

    // att-value paare mit | getrennt. Innerhalb eines values gibt es auch Zeilenumbrche (mit '<br />') - dies gilt als Aufzhlung
    // |Single1 |Datum1 , Besetzung1a Besetzung1b, Sonstiges1Titel |Sonstiges1Inhalt , Coverversion3 |Jahr3
    // | 1Option = 3
    // | 1Option Name = Demos
    // | 1Option Link = Demos
    // | 1Option Color =

    // als erstes schneiden wir mal die Infobox raus. (?m) ist multiline und (?s) ist dotall ('.' matcht auch line breaks)
    int iStartInfoBox = -1;
    int iEndInfoBox = -1;
    MatchResult infoMatch = StringUtils.findFirst("\\{\\{\\s*Infobox", strText);
    if (infoMatch != null) {
        iStartInfoBox = infoMatch.start();
        iEndInfoBox = StringUtils.findMatchingBracket(iStartInfoBox, strText) + 1;
    } else//from  w w  w.  jav a 2s  .c  o  m
        return;

    if (strText.length() < 3 || strText.length() < iEndInfoBox || iEndInfoBox <= 0
            || (iStartInfoBox + 2) > iEndInfoBox)
        return;

    String strInfoBox = "";

    strInfoBox = strText.substring(iStartInfoBox + 2, iEndInfoBox);
    if (strInfoBox.length() < 5)
        return;

    String strCleanedInfoBox = m_wikiModel.render(new PlainTextConverter(),
            strInfoBox.replaceAll("<br />", "&lt;br /&gt;"));

    // da wir hier eigentlich relationierte Datenstze haben, machen wir auch einzelne, separierte Dokumente draus

    // System.out.println(strCleanedInfoBox);
    // System.out.println(strCleanedInfoBox.substring(0, strCleanedInfoBox.indexOf("\n")).trim());

    // erste Zeile bezeichnet die InfoBox
    int iIndex = strCleanedInfoBox.indexOf("|");
    if (iIndex == -1)
        iIndex = strCleanedInfoBox.indexOf("\n");
    if (iIndex == -1)
        return;
    String strInfoBoxName = strCleanedInfoBox.substring(7, iIndex).trim();
    metadata.add(infobox, strInfoBoxName);

    String[] straCleanedInfoBoxSplit = strCleanedInfoBox.split("\\s*\\|\\s*");

    HashMap<String, MultiValueHashMap<String, String>> hsSubDocId2AttValuePairsOfSubDoc = new HashMap<String, MultiValueHashMap<String, String>>();

    for (String strAttValuePair : straCleanedInfoBoxSplit) {

        // System.out.println("\nattValPair unsplittet " + strAttValuePair);
        // die Dinger sind mit einem '=' getrennt
        String[] straAtt2Value = strAttValuePair.split("=");

        if (straAtt2Value.length == 0 || straAtt2Value[0] == null)
            continue;
        if (straAtt2Value.length < 2 || straAtt2Value[1] == null)
            continue;

        String strAttName = straAtt2Value[0].trim();
        String strAttValues = straAtt2Value[1];
        if (StringUtils.nullOrWhitespace(strAttValues))
            continue;
        // Innerhalb eines values gibt es auch Zeilenumbrche (mit '<br />' bzw. '&lt;br /&gt;') - dies gilt als Aufzhlung
        String[] straAttValues = strAttValues.split(Pattern.quote("&lt;br /&gt;"));
        // XXX wir werfen zusatzangaben in Klammern erst mal weg - man knnte sie auch als attnameAddInfo in einem extra Attribut speichern -
        // allerdings mu man dann wieder aufpassen, ob nicht ein subDocument entstehen mu (Bsp. mehrere Genre-entries mit jeweiliger
        // Jahreszahl)

        // der Attributname entscheidet nun, ob ein Dokument ausgelagert werden soll oder nicht. Ist darin eine Zahl enthalten, dann entfernen
        // wir diese und gruppieren alle att-value-paare mit dieser Zahl in einen extra Datensatz (MultiValueHashMap)
        Matcher numberMatcher = Pattern.compile("([\\D]*)(\\d+)([\\D]*)").matcher(strAttName);

        if (!numberMatcher.find()) {
            // wir haben keine Zahl im AttNamen - wir tragen diesen Wert einfach in die Metadaten ein.
            for (String strAttValue : straAttValues) {
                String strCleanedAttValue = cleanAttValue(strAttName, strAttValue);
                if (strCleanedAttValue != null)
                    metadata.add(strAttName, strCleanedAttValue);
            }
        } else {
            // wir haben eine Zahl im Namen - wir tragen den Wert in einem SubDocument unter der Id <zahl> ein
            String strPrefix = numberMatcher.group(1);
            String strNumber = numberMatcher.group(2);
            String strSuffix = numberMatcher.group(3);

            String strDataSetId = strPrefix + strNumber;
            String strFinalAttName = strPrefix + strSuffix;

            // wenn wir noch mehr Zahlen haben, dann haben wir geloost - und tragen es einfach ein
            if (numberMatcher.find()) {
                for (String strAttValue : straAttValues) {
                    String strCleanedAttValue = cleanAttValue(strFinalAttName, strAttValue);
                    if (strCleanedAttValue != null)
                        metadata.add(strFinalAttName, strCleanedAttValue);
                }
            }

            // System.out.println("prefix " + strPrefix);
            // System.out.println("num " + strDataSetId);
            // System.out.println("suffix " + strSuffix);
            MultiValueHashMap<String, String> hsAttname2ValueOfSubDoc = hsSubDocId2AttValuePairsOfSubDoc
                    .get(strDataSetId);
            if (hsAttname2ValueOfSubDoc == null) {
                hsAttname2ValueOfSubDoc = new MultiValueHashMap<String, String>();
                hsSubDocId2AttValuePairsOfSubDoc.put(strDataSetId, hsAttname2ValueOfSubDoc);
            }

            for (String strAttValue : straAttValues)
                hsAttname2ValueOfSubDoc.add(strFinalAttName, strAttValue.replaceAll("\\(.*?\\)", "").trim());

        }
    }

    String strPageId = new UID().toString();
    metadata.add(LeechMetadata.id, strPageId);

    // we have to use the same metadata Object
    Metadata metadataBackup4ParentPage = TikaUtils.copyMetadata(metadata);

    for (MultiValueHashMap<String, String> hsAttValuePairsOfSubDoc : hsSubDocId2AttValuePairsOfSubDoc
            .values()) {

        TikaUtils.clearMetadata(metadata);

        // die Referenz zu meinem parent
        metadata.add(LeechMetadata.parentId, strPageId);
        metadata.add(infobox, strInfoBoxName);
        String strChildId = new UID().toString();
        metadata.add(LeechMetadata.id, strChildId);
        // zum rckreferenzieren geben wir dem parent auch noch unsere id
        metadataBackup4ParentPage.add(LeechMetadata.childId, strChildId);

        for (Entry<String, String> attName2Value4SubDoc : hsAttValuePairsOfSubDoc.entryList()) {
            String strAttName = attName2Value4SubDoc.getKey();
            String strAttValue = attName2Value4SubDoc.getValue();

            String strCleanedAttValue = cleanAttValue(strAttName, strAttValue);
            if (strCleanedAttValue != null)
                metadata.add(strAttName, strCleanedAttValue);
        }

        metadata.add(Metadata.CONTENT_TYPE, "application/wikipedia-meta+xml");

        // so erreichen wir, da im bergeordneten ContentHandler mehrere Docs ankommen :)
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.endDocument();

    }

    TikaUtils.clearMetadata(metadata);
    TikaUtils.copyMetadataFromTo(metadataBackup4ParentPage, metadata);

}

From source file:gate.creole.splitter.RegexSentenceSplitter.java

@Override
public void execute() throws ExecutionException {
    interrupted = false;/* w w  w . j  a  v a 2  s  .co  m*/
    int lastProgress = 0;
    fireProgressChanged(lastProgress);
    //get pointers to the annotation sets
    AnnotationSet outputAS = (outputASName == null || outputASName.trim().length() == 0)
            ? document.getAnnotations()
            : document.getAnnotations(outputASName);

    String docText = document.getContent().toString();

    /* If the document's content is empty or contains only whitespace,
     * we drop out right here, since there's nothing to sentence-split.     */
    if (docText.trim().length() < 1) {
        return;
    }

    Matcher internalSplitMatcher = internalSplitsPattern.matcher(docText);
    Matcher externalSplitMatcher = externalSplitsPattern.matcher(docText);

    Matcher nonSplitMatcher = nonSplitsPattern.matcher(docText);
    //store all non split locations in a list of pairs
    List<int[]> nonSplits = new LinkedList<int[]>();
    while (nonSplitMatcher.find()) {
        nonSplits.add(new int[] { nonSplitMatcher.start(), nonSplitMatcher.end() });
    }
    //this lists holds the next matches at each step
    List<MatchResult> nextSplitMatches = new ArrayList<MatchResult>();
    //initialise matching process
    MatchResult internalMatchResult = null;
    if (internalSplitMatcher.find()) {
        internalMatchResult = internalSplitMatcher.toMatchResult();
        nextSplitMatches.add(internalMatchResult);
    }
    MatchResult externalMatchResult = null;
    if (externalSplitMatcher.find()) {
        externalMatchResult = externalSplitMatcher.toMatchResult();
        nextSplitMatches.add(externalMatchResult);
    }
    MatchResultComparator comparator = new MatchResultComparator();
    int lastSentenceEnd = 0;

    while (!nextSplitMatches.isEmpty()) {
        //see which one matches first
        Collections.sort(nextSplitMatches, comparator);
        MatchResult nextMatch = nextSplitMatches.remove(0);
        if (nextMatch == internalMatchResult) {
            //we have a new internal split; see if it's vetoed or not
            if (!veto(nextMatch, nonSplits)) {
                //split is not vetoed
                try {
                    //add the split annotation
                    FeatureMap features = Factory.newFeatureMap();
                    features.put("kind", "internal");
                    outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features);
                    //generate the sentence annotation
                    int endOffset = nextMatch.end();
                    //find the first non whitespace character starting from where the
                    //last sentence ended
                    while (lastSentenceEnd < endOffset
                            && Character.isWhitespace(Character.codePointAt(docText, lastSentenceEnd))) {
                        lastSentenceEnd++;
                    }
                    //if there is any useful text between the two offsets, generate
                    //a new sentence
                    if (lastSentenceEnd < nextMatch.start()) {
                        outputAS.add(new Long(lastSentenceEnd), new Long(endOffset),
                                ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap());
                    }
                    //store the new sentence end
                    lastSentenceEnd = endOffset;
                } catch (InvalidOffsetException e) {
                    // this should never happen
                    throw new ExecutionException(e);
                }
            }
            //prepare for next step
            if (internalSplitMatcher.find()) {
                internalMatchResult = internalSplitMatcher.toMatchResult();
                nextSplitMatches.add(internalMatchResult);
            } else {
                internalMatchResult = null;
            }
        } else if (nextMatch == externalMatchResult) {
            //we have a new external split; see if it's vetoed or not
            if (!veto(nextMatch, nonSplits)) {
                //split is not vetoed
                try {
                    //generate the split
                    FeatureMap features = Factory.newFeatureMap();
                    features.put("kind", "external");
                    outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features);
                    //generate the sentence annotation
                    //find the last non whitespace character, going backward from
                    //where the external skip starts
                    int endOffset = nextMatch.start();
                    while (endOffset > lastSentenceEnd
                            && Character.isSpaceChar(Character.codePointAt(docText, endOffset - 1))) {
                        endOffset--;
                    }
                    //find the first non whitespace character starting from where the
                    //last sentence ended
                    while (lastSentenceEnd < endOffset
                            && Character.isSpaceChar(Character.codePointAt(docText, lastSentenceEnd))) {
                        lastSentenceEnd++;
                    }
                    //if there is any useful text between the two offsets, generate
                    //a new sentence
                    if (lastSentenceEnd < endOffset) {
                        outputAS.add(new Long(lastSentenceEnd), new Long(endOffset),
                                ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap());
                    }
                    //store the new sentence end
                    lastSentenceEnd = nextMatch.end();
                } catch (InvalidOffsetException e) {
                    // this should never happen
                    throw new ExecutionException(e);
                }
            }
            //prepare for next step
            if (externalSplitMatcher.find()) {
                externalMatchResult = externalSplitMatcher.toMatchResult();
                nextSplitMatches.add(externalMatchResult);
            } else {
                externalMatchResult = null;
            }
        } else {
            //malfunction
            throw new ExecutionException("Invalid state - cannot identify match!");
        }
        //report progress
        int newProgress = 100 * lastSentenceEnd / docText.length();
        if (newProgress - lastProgress > 20) {
            lastProgress = newProgress;
            fireProgressChanged(lastProgress);
        }
    } //while(!nextMatches.isEmpty()){
    fireProcessFinished();
}