Example usage for java.util.regex Matcher start

List of usage examples for java.util.regex Matcher start

Introduction

In this page you can find the example usage for java.util.regex Matcher start.

Prototype

public int start(String name) 

Source Link

Document

Returns the start index of the subsequence captured by the given named-capturing group during the previous match operation.

Usage

From source file:com.joliciel.talismane.filters.SentenceHolderImpl.java

@Override
public List<Sentence> getDetectedSentences(Sentence leftover) {
    if (LOG.isTraceEnabled()) {
        LOG.trace("getDetectedSentences. leftover=" + leftover);
    }/* w w  w . j  a v  a 2  s  . c om*/

    List<Sentence> sentences = new ArrayList<Sentence>();

    int currentIndex = 0;
    boolean haveLeftOvers = this.getText().length() > 0;
    if (this.sentenceBoundaries.size() > 0) {
        haveLeftOvers = false;
        int lastSentenceBoundary = this.sentenceBoundaries.descendingIterator().next();
        if (lastSentenceBoundary < this.getText().length() - 1) {
            haveLeftOvers = true;
        }
        if (LOG.isTraceEnabled()) {
            LOG.trace("haveLeftOvers? " + lastSentenceBoundary + " < " + (this.getText().length() - 1) + " = "
                    + haveLeftOvers);
        }
    }

    List<Integer> allBoundaries = new ArrayList<Integer>(this.sentenceBoundaries);
    if (haveLeftOvers)
        allBoundaries.add(this.getText().length() - 1);

    for (int sentenceBoundary : allBoundaries) {
        boolean isLeftover = haveLeftOvers && sentenceBoundary == this.getText().length() - 1;

        Sentence sentence = filterService.getSentence();
        int leftOverTextLength = 0;
        String text = "";
        if (leftover != null) {
            sentence = leftover;
            leftOverTextLength = leftover.getText().length();
            text = leftover.getText() + this.getText().substring(currentIndex, sentenceBoundary + 1);
            leftover = null;
        } else {
            text = this.getText().substring(currentIndex, sentenceBoundary + 1);
        }

        // handle trim & duplicate white space here
        Matcher matcherOpeningWhiteSpace = openingWhiteSpacePattern.matcher(text);
        int openingWhiteSpaceEnd = 0;
        if (matcherOpeningWhiteSpace.find()) {
            openingWhiteSpaceEnd = matcherOpeningWhiteSpace.end(1);
        }

        int closingWhiteSpaceStart = text.length();
        if (!isLeftover) {
            Matcher matcherClosingWhiteSpace = closingWhiteSpacePattern.matcher(text);
            if (matcherClosingWhiteSpace.find()) {
                closingWhiteSpaceStart = matcherClosingWhiteSpace.start(1);
            }
        }

        Matcher matcherDuplicateWhiteSpace = duplicateWhiteSpacePattern.matcher(text);
        Set<Integer> duplicateWhiteSpace = new HashSet<Integer>();
        while (matcherDuplicateWhiteSpace.find()) {
            // remove all white space barring the first
            for (int i = matcherDuplicateWhiteSpace.start() + 1; i < matcherDuplicateWhiteSpace.end(); i++) {
                duplicateWhiteSpace.add(i);
            }
        }

        StringBuilder sb = new StringBuilder();
        int i = currentIndex;
        for (int j = 0; j < text.length(); j++) {
            boolean appendLetter = false;
            if (j < openingWhiteSpaceEnd) {
                // do nothing
            } else if (j >= closingWhiteSpaceStart) {
                // do nothing
            } else if (duplicateWhiteSpace.contains(j)) {
                // do nothing
            } else {
                appendLetter = true;
            }

            if (j >= leftOverTextLength) {
                // if we're past the leftovers and onto the new stuff
                if (appendLetter)
                    sentence.addOriginalIndex(this.getOriginalIndexes().get(i));

                if (this.getOriginalTextSegments().containsKey(i))
                    sentence.getOriginalTextSegments().put(sb.length(), this.getOriginalTextSegments().get(i));

                i++;
            }

            if (appendLetter)
                sb.append(text.charAt(j));
        }

        sentence.setText(sb.toString());
        if (LOG.isTraceEnabled()) {
            LOG.trace("sentence.setText |" + sentence.getText() + "|");
        }

        sentence.setComplete(!isLeftover);

        for (Entry<Integer, Integer> newlineLocation : this.newlines.entrySet()) {
            sentence.addNewline(newlineLocation.getKey(), newlineLocation.getValue());
        }

        sentence.setFileName(this.getFileName());

        sentences.add(sentence);
        currentIndex = sentenceBoundary + 1;
    }

    return sentences;
}

From source file:com.dreamlinx.automation.DINRelay.java

/**
 * Creates an HttpClient to communicate with the DIN relay.
 * @throws MalformedURLException//  w  w w.  ja  va 2s. c o  m
 * @throws HttpException
 * @throws IOException
 */
private void setupHttpClient() throws MalformedURLException, HttpException, IOException {
    httpClient = new HttpClient();
    httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);

    GetMethod getMethod = new GetMethod("http://" + ipAddress);
    int result = httpClient.executeMethod(getMethod);
    if (result != 200) {
        throw new HttpException(result + " - " + getMethod.getStatusText());
    }

    String response = getMethod.getResponseBodyAsString();
    getMethod.releaseConnection();

    String regex = "name=\"Challenge\" value=\".*\"";
    Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
    Matcher matcher = pattern.matcher(response);
    String challenge = "";
    while (matcher.find()) {
        int start = matcher.start(0);
        int end = matcher.end(0);
        challenge = response.substring(start + 24, end - 1);
    }

    String md5Password = challenge + username + password + challenge;
    md5Password = toMD5(md5Password);

    PostMethod postMethod = new PostMethod("http://" + ipAddress + "/login.tgi");
    postMethod.addParameter("Username", username);
    postMethod.addParameter("Password", md5Password);

    result = httpClient.executeMethod(postMethod);
    if (result != 200) {
        throw new HttpException(result + " - " + postMethod.getStatusText());
    }
    postMethod.releaseConnection();
}

From source file:net.healeys.lexic.online.OnlineGame.java

public boolean start() {
    Pattern pat = Pattern.compile("(\\w+):(.+)");
    for (int attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
        try {//from  www.j av  a  2s . c o m
            HttpClient httpClient = new DefaultHttpClient();
            HttpGet get = new HttpGet(uri);
            addHeaders(get);

            HttpResponse resp = httpClient.execute(get);

            BufferedReader br = new BufferedReader(new InputStreamReader(resp.getEntity().getContent()));

            String line;
            while ((line = br.readLine()) != null) {
                // Log.d(TAG,"line:"+line);
                Matcher mat = pat.matcher(line);
                if (mat.find()) {
                    String key = line.substring(mat.start(1), mat.end(1));
                    String value = line.substring(mat.start(2), mat.end(2));
                    // Log.d(TAG,"key:"+key);
                    // Log.d(TAG,"value:"+value);

                    if (key.equals("board")) {
                        String[] letters = value.split(",");
                        if (letters.length == 16) {
                            setBoard(new FourByFourBoard(letters));
                        } else if (letters.length == 25) {
                            setBoard(new FiveByFiveBoard(letters));
                        }
                    } else if (key.equals("id")) {
                        id = Integer.parseInt(value);
                    } else {
                        urls.put(key, value);
                    }

                }
            }

            super.start();
            return true;
        } catch (Exception e) {
            // Log.e(TAG,"Connection Error in constructor",e);
        }
    }

    super.start();
    return false;
}

From source file:com.nextep.designer.sqlgen.oracle.parser.OraclePackageParser.java

private String renameSqlEnd(String sql, String newName) {
    // Matching the END tag
    final Pattern pattern = Pattern.compile("end\\s+((\\w)+)(;|/|\\s)*$"); //$NON-NLS-1$
    final Matcher m = pattern.matcher(sql.toLowerCase());
    String newSql = sql;/*w  w  w.  j a  v  a  2 s  . co  m*/
    if (m.find()) {
        newSql = sql.substring(0, m.start(1)) + newName + sql.substring(m.end(1));
    }
    return newSql;
}

From source file:com.zextras.zimbradrive.soap.SearchRequestHdlr.java

private String getStandardQuery(String query) {
    StringBuilder parsedQueryBuilder = new StringBuilder();

    Pattern nonQuotedTokenSValuePattern = Pattern.compile("([^ :]+:)([^\"]*?)( |$)"); //preTokenDelimiter tokenName : nonQuotedTokenValue postTokenDelimiter
    Matcher nonQuotedTokenSValueMatcher = nonQuotedTokenSValuePattern.matcher(query);
    int lastMatchEndIndex = 0;
    while (nonQuotedTokenSValueMatcher.find()) {
        String preMatchValueQuery = query.substring(lastMatchEndIndex, nonQuotedTokenSValueMatcher.end(1));

        String matchValueQuery = query.substring(nonQuotedTokenSValueMatcher.start(2),
                nonQuotedTokenSValueMatcher.end(2));

        parsedQueryBuilder.append(preMatchValueQuery).append("\"").append(matchValueQuery).append("\"");

        lastMatchEndIndex = nonQuotedTokenSValueMatcher.end(2);
    }/*  ww w.  j a  v a  2  s .co  m*/

    parsedQueryBuilder.append(query.substring(lastMatchEndIndex));

    return parsedQueryBuilder.toString();
}

From source file:com.google.testing.pogen.parser.template.RegexVariableExtractor.java

@Override
public void startElement(QName element, XMLAttributes attrs, Augmentations augs) {
    processCharacters();// ww w  . j a  v a  2  s.  c om

    // Ignore elements with prefix (:) to deal with not html elements such as "c:set" in JSP.
    if (element.prefix == null) {
        // Get offset information
        HTMLEventInfo info = (HTMLEventInfo) augs.getItem(AUGMENTATIONS);
        HtmlTagInfo tagInfo = new HtmlTagInfo(attrs.getValue(attributeName), info.getBeginCharacterOffset(),
                info.getEndCharacterOffset(), repeatedRanges);
        tagInfoStack.push(tagInfo);

        for (int i = 0; i < attrs.getLength(); i++) {
            // Ignore variables appearing two more than
            Matcher matcher = variablePattern.matcher(attrs.getValue(i));
            while (matcher.find()) {
                int iGroup = getFirstAvailableGroupIndex(matcher);
                if (!excludedRanges.contains(matcher.start(iGroup))) {
                    tagInfo.addVariableInfo(matcher.group(0), matcher.group(iGroup), matcher.start(iGroup),
                            attrs.getQName(i));
                }
            }
            if (attrs.getQName(i).equals("id")) {
                tagInfo.setIdValue(attrs.getValue(i));
            } else if (attrs.getQName(i).equals("name")) {
                tagInfo.setNameValue(attrs.getValue(i));
            }
        }
    }
    super.startElement(element, attrs, augs);
}

From source file:net.healeys.lexic.online.OnlineGame.java

public boolean submitWords(WebView display) {
    Pattern contentPat = Pattern.compile("([^;]+); charset=(.+)");
    for (int attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
        String url = BASE_URL + urls.get("words");

        Iterator<String> li = uniqueListIterator();
        StringBuffer sb = new StringBuffer(4096);
        while (li.hasNext()) {
            sb.append(li.next());/*from  ww  w.j a v  a2s . c  om*/
            if (li.hasNext())
                sb.append(',');
        }
        String data = URLEncoder.encode(sb.toString());

        try {

            HttpClient httpClient = new DefaultHttpClient();
            HttpPost post = new HttpPost(url);
            addHeaders(post);

            post.setEntity(new StringEntity("words=" + data));

            HttpResponse resp = httpClient.execute(post);
            BufferedReader br = new BufferedReader(new InputStreamReader(resp.getEntity().getContent()));

            sb = new StringBuffer(4096);
            String line;
            while ((line = br.readLine()) != null) {
                sb.append(line);
                sb.append('\n');
            }

            String contentHeader = resp.getFirstHeader("Content-type").getValue();
            String contentType;
            String contentEncoding;
            Matcher mat = contentPat.matcher(contentHeader);
            if (mat.find()) {
                contentType = contentHeader.substring(mat.start(1), mat.end(1));
                contentEncoding = contentHeader.substring(mat.start(2), mat.end(2));
            } else {
                contentType = contentHeader;
                contentEncoding = "utf-8";
            }

            // Log.d(TAG,"url:"+url);
            // Log.d(TAG,"data:"+sb.toString());
            // Log.d(TAG,"contentType:"+contentType);
            // Log.d(TAG,"contentEncoding:"+contentEncoding);

            display.loadDataWithBaseURL(url, sb.toString(), "text/html", "utf-8", null);
            return true;
        } catch (Exception e) {
            // Log.d(TAG,"error submitting words",e);
        }
    }
    return false;
}

From source file:dk.netarkivet.harvester.harvesting.extractor.IcelandicExtractorJS.java

public long considerStrings(Extractor ext, CrawlURI curi, CharSequence cs, boolean handlingJSFile) {
    long foundLinks = 0;
    Matcher strings = TextUtils.getMatcher(JAVASCRIPT_STRING_EXTRACTOR, cs);
    int startIndex = 0;
    while (strings.find(startIndex)) {
        CharSequence subsequence = cs.subSequence(strings.start(2), strings.end(2));
        Matcher uri = TextUtils.getMatcher(STRING_URI_DETECTOR, subsequence);
        if (uri.matches()) {
            String string = uri.group();
            boolean falsePositive = false;
            try {
                string = StringEscapeUtils.unescapeJavaScript(string);
            } catch (NestableRuntimeException e) {
                LOGGER.log(Level.WARNING, "problem unescaping some javascript", e);
            }/* w ww.  j a  v  a2s  . c o m*/
            string = UriUtils.speculativeFixup(string, curi.getUURI());

            // Filter out some bad false positives (should really fix regexp for URI detection) 
            if (string.contains("/.") || string.contains("@") || string.length() > 150) {
                // While legal in URIs, these are rare and usually an indication of a false positive
                // in the speculative extraction.
                falsePositive = true;
            }

            if (!falsePositive) {
                falsePositive = shouldIgnorePossibleRelativeLink(string);
            }

            if (falsePositive) {
                foundFalsePositives++;
            } else {
                foundLinks++;
                try {
                    int max = ext.getExtractorParameters().getMaxOutlinks();
                    if (handlingJSFile) {
                        addRelativeToVia(curi, max, string, JS_MISC, SPECULATIVE);
                    } else {
                        addRelativeToBase(curi, max, string, JS_MISC, SPECULATIVE);
                    }
                } catch (URIException e) {
                    ext.logUriError(e, curi.getUURI(), string);
                }
            }
        } else {
            foundLinks += considerStrings(ext, curi, subsequence, handlingJSFile);
        }

        // reconsider the last closing quote as possible opening quote
        startIndex = strings.end(2);
    }
    TextUtils.recycleMatcher(strings);
    return foundLinks;
}

From source file:com.github.rwitzel.streamflyer.xml.XmlVersionModifier.java

/**
 * @see com.github.rwitzel.streamflyer.core.Modifier#modify(java.lang.StringBuilder, int, boolean)
 *//*from   w w  w .j  av  a  2s.  com*/
@Override
public AfterModification modify(StringBuilder characterBuffer, int firstModifiableCharacterInBuffer,
        boolean endOfStreamHit) {

    switch (state) {

    case NO_LONGER_MODIFYING:

        return factory.skipEntireBuffer(characterBuffer, firstModifiableCharacterInBuffer, endOfStreamHit);

    case INITIAL:

        state = XmlVersionModifierState.PROLOG_REQUEST;

        // you never know how many whitespace characters are in the prolog
        return factory.modifyAgainImmediately(INITIAL_NUMBER_OF_CHARACTERS, firstModifiableCharacterInBuffer);

    case PROLOG_REQUEST:

        // (Should we do aware of BOMs here? No. I consider it the
        // responsibility of the caller to provide characters without BOM.)

        Matcher matcher = Pattern.compile("<\\?xml[^>]*version\\s*=\\s*['\"]((1.0)|(1.1))['\"].*")
                .matcher(characterBuffer);
        if (matcher.matches()) {

            // replace version in prolog
            characterBuffer.replace(matcher.start(1), matcher.end(1), xmlVersion);
        } else {
            // is there a prolog that is too long?
            Matcher matcher2 = Pattern.compile("<\\?xml.*").matcher(characterBuffer);
            if (matcher2.matches()) {
                // this is not normal at all -> throw exception
                throw new XmlPrologRidiculouslyLongException(characterBuffer.toString());
            }

            // insert prolog
            characterBuffer.insert(0, "<?xml version='" + xmlVersion + "'>");
        }

        state = XmlVersionModifierState.NO_LONGER_MODIFYING;

        return factory.skipEntireBuffer(characterBuffer, firstModifiableCharacterInBuffer, endOfStreamHit);

    default:
        throw new IllegalStateException("state " + state + " not supported");

    }

}

From source file:gov.nyc.doitt.gis.geoclient.parser.test.ChunkSpecParser.java

protected List<MutableToken> parseBracketValues(String delimitedString) {
    List<MutableToken> result = new ArrayList<>();
    Matcher matcher = TOKEN_VALUE_PATTERN.matcher(delimitedString);
    int previousBracketOffset = 0;
    while (matcher.find()) {
        String value = matcher.group(1);
        int start = matcher.start(1) - previousBracketOffset - 1; // -1 for opening "[" of this group
        int end = start + value.length();
        result.add(new MutableToken(value, start, end));
        previousBracketOffset = previousBracketOffset + 2;

    }//from  w ww .  j  a va  2 s  . c  o  m
    return result;
}