Example usage for org.apache.lucene.analysis Token setFlags

List of usage examples for org.apache.lucene.analysis Token setFlags

Introduction

In this page you can find the example usage for org.apache.lucene.analysis Token setFlags.

Prototype

@Override
public void setFlags(int flags) 

Source Link

Usage

From source file:org.apache.solr.analysis.BufferedTokenStream.java

License:Apache License

/** old api emulation for back compat */
private Token readToken() throws IOException {
    if (!input.incrementToken()) {
        return null;
    } else {/*w ww . j av a 2s.  c  om*/
        Token token = new Token();
        token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
        token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
        token.setType(typeAtt.type());
        token.setFlags(flagsAtt.getFlags());
        token.setPositionIncrement(posIncAtt.getPositionIncrement());
        token.setPayload(payloadAtt.getPayload());
        return token;
    }
}

From source file:org.apache.solr.handler.component.SpellCheckComponent.java

License:Apache License

private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
    Collection<Token> result = new ArrayList<Token>();
    assert analyzer != null;
    TokenStream ts = analyzer.tokenStream("", q);
    try {/*www. j ava2 s . c  om*/
        ts.reset();
        // TODO: support custom attributes
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
        FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
        PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

        while (ts.incrementToken()) {
            Token token = new Token();
            token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            token.setType(typeAtt.type());
            token.setFlags(flagsAtt.getFlags());
            token.setPayload(payloadAtt.getPayload());
            token.setPositionIncrement(posIncAtt.getPositionIncrement());
            result.add(token);
        }
        ts.end();
        return result;
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}

From source file:org.apache.solr.spelling.SimpleQueryConverter.java

License:Apache License

@Override
public Collection<Token> convert(String origQuery) {
    Collection<Token> result = new HashSet<Token>();
    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40);

    TokenStream ts = null;/*from w w  w.  j  av a 2s . c om*/
    try {
        ts = analyzer.tokenStream("", origQuery);
        // TODO: support custom attributes
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
        FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
        PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

        ts.reset();

        while (ts.incrementToken()) {
            Token tok = new Token();
            tok.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            tok.setFlags(flagsAtt.getFlags());
            tok.setPayload(payloadAtt.getPayload());
            tok.setPositionIncrement(posIncAtt.getPositionIncrement());
            tok.setType(typeAtt.type());
            result.add(tok);
        }
        ts.end();
        return result;
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}

From source file:org.apache.solr.spelling.SpellingQueryConverter.java

License:Apache License

/**
 * Converts the original query string to a collection of Lucene Tokens.
 * @param original the original query string
 * @return a Collection of Lucene Tokens
 *//*  w w w .j  ava 2  s.  com*/
@Override
public Collection<Token> convert(String original) {
    if (original == null) { // this can happen with q.alt = and no query
        return Collections.emptyList();
    }
    Collection<Token> result = new ArrayList<Token>();
    Matcher matcher = QUERY_REGEX.matcher(original);
    String nextWord = null;
    int nextStartIndex = 0;
    String lastBooleanOp = null;
    while (nextWord != null || matcher.find()) {
        String word = null;
        int startIndex = 0;
        if (nextWord != null) {
            word = nextWord;
            startIndex = nextStartIndex;
            nextWord = null;
        } else {
            word = matcher.group(0);
            startIndex = matcher.start();
        }
        if (matcher.find()) {
            nextWord = matcher.group(0);
            nextStartIndex = matcher.start();
        }
        if ("AND".equals(word) || "OR".equals(word) || "NOT".equals(word)) {
            lastBooleanOp = word;
            continue;
        }
        // treat "AND NOT" as "NOT"...
        if ("AND".equals(nextWord) && original.length() > nextStartIndex + 7
                && original.substring(nextStartIndex, nextStartIndex + 7).equals("AND NOT")) {
            nextWord = "NOT";
        }

        int flagValue = 0;
        if (word.charAt(0) == '-' || (startIndex > 0 && original.charAt(startIndex - 1) == '-')) {
            flagValue = PROHIBITED_TERM_FLAG;
        } else if (word.charAt(0) == '+' || (startIndex > 0 && original.charAt(startIndex - 1) == '+')) {
            flagValue = REQUIRED_TERM_FLAG;
            //we don't know the default operator so just assume the first operator isn't new.
        } else if (nextWord != null && lastBooleanOp != null && !nextWord.equals(lastBooleanOp)
                && ("AND".equals(nextWord) || "OR".equals(nextWord) || "NOT".equals(nextWord))) {
            flagValue = TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG;
            //...unless the 1st boolean operator is a NOT, because only AND/OR can be default.
        } else if (nextWord != null && lastBooleanOp == null && !nextWord.equals(lastBooleanOp)
                && ("NOT".equals(nextWord))) {
            flagValue = TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG;
        }
        try {
            analyze(result, word, startIndex, flagValue);
        } catch (IOException e) {
            // TODO: shouldn't we log something?
        }
    }
    if (lastBooleanOp != null) {
        for (Token t : result) {
            int f = t.getFlags();
            t.setFlags(f |= QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG);
        }
    }
    return result;
}

From source file:org.apache.solr.spelling.SpellingQueryConverter.java

License:Apache License

protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue)
        throws IOException {
    TokenStream stream = analyzer.tokenStream("", text);
    // TODO: support custom attributes
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
    OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
    stream.reset();/*from w  ww .  j  a va 2s.  co m*/
    while (stream.incrementToken()) {
        Token token = new Token();
        token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
        token.setOffset(offset + offsetAtt.startOffset(), offset + offsetAtt.endOffset());
        token.setFlags(flagsAttValue); //overwriting any flags already set...
        token.setType(typeAtt.type());
        token.setPayload(payloadAtt.getPayload());
        token.setPositionIncrement(posIncAtt.getPositionIncrement());
        result.add(token);
    }
    stream.end();
    stream.close();
}

From source file:wiki.indexer.tokenizer.CustomWikipediaTokenizer.java

License:Apache License

@SuppressWarnings("unchecked")
private void collapseAndSaveTokens(final Token reusableToken, int tokenType, String type) throws IOException {
    // collapse/*  ww w  .j a v  a  2  s. co m*/
    StringBuffer buffer = new StringBuffer(32);
    int numAdded = scanner.setText(buffer);
    // TODO: how to know how much whitespace to add
    int theStart = scanner.yychar();
    int lastPos = theStart + numAdded;
    int tmpTokType;
    int numSeen = 0;
    List tmp = new ArrayList();
    Token saved = new Token();
    setupSavedToken(saved, 0, type);
    tmp.add(saved);
    // while we can get a token and that token is the same type and we have
    // not transitioned to a new wiki-item of the same type
    while ((tmpTokType = scanner.getNextToken()) != CustomWikipediaTokenizerImpl.YYEOF
            && tmpTokType == tokenType && scanner.getNumWikiTokensSeen() > numSeen) {
        int currPos = scanner.yychar();
        // append whitespace
        for (int i = 0; i < (currPos - lastPos); i++) {
            buffer.append(' ');
        }
        numAdded = scanner.setText(buffer);
        saved = new Token();
        setupSavedToken(saved, scanner.getPositionIncrement(), type);
        tmp.add(saved);
        numSeen++;
        lastPos = currPos + numAdded;
    }
    // trim the buffer
    String s = buffer.toString().trim();
    reusableToken.setTermBuffer(s.toCharArray(), 0, s.length());
    reusableToken.setStartOffset(theStart);
    reusableToken.setEndOffset(theStart + s.length());
    reusableToken.setFlags(UNTOKENIZED_TOKEN_FLAG);
    // The way the loop is written, we will have proceeded to the next
    // token. We need to pushback the scanner to lastPos
    if (tmpTokType != CustomWikipediaTokenizerImpl.YYEOF) {
        scanner.yypushback(scanner.yylength());
    }
    tokens = tmp.iterator();
}

From source file:wiki.indexer.tokenizer.CustomWikipediaTokenizer.java

License:Apache License

private void collapseTokens(final Token reusableToken, int tokenType) throws IOException {
    // collapse//from   ww  w  .ja v  a  2 s  . c  om
    StringBuffer buffer = new StringBuffer(32);
    int numAdded = scanner.setText(buffer);
    // TODO: how to know how much whitespace to add
    int theStart = scanner.yychar();
    int lastPos = theStart + numAdded;
    int tmpTokType;
    int numSeen = 0;
    // while we can get a token and that token is the same type and we have
    // not transitioned to a new wiki-item of the same type
    while ((tmpTokType = scanner.getNextToken()) != CustomWikipediaTokenizerImpl.YYEOF
            && tmpTokType == tokenType && scanner.getNumWikiTokensSeen() > numSeen) {
        int currPos = scanner.yychar();
        // append whitespace
        for (int i = 0; i < (currPos - lastPos); i++) {
            buffer.append(' ');
        }
        numAdded = scanner.setText(buffer);
        numSeen++;
        lastPos = currPos + numAdded;
    }
    // trim the buffer
    String s = buffer.toString().trim();
    reusableToken.setTermBuffer(s.toCharArray(), 0, s.length());
    reusableToken.setStartOffset(theStart);
    reusableToken.setEndOffset(theStart + s.length());
    reusableToken.setFlags(UNTOKENIZED_TOKEN_FLAG);
    // The way the loop is written, we will have proceeded to the next
    // token. We need to pushback the scanner to lastPos
    if (tmpTokType != CustomWikipediaTokenizerImpl.YYEOF) {
        scanner.yypushback(scanner.yylength());
    } else {
        tokens = null;
    }
}