Example usage for org.apache.lucene.analysis Token setOffset

List of usage examples for org.apache.lucene.analysis Token setOffset

Introduction

In this page you can find the example usage for org.apache.lucene.analysis Token setOffset.

Prototype

@Override
public void setOffset(int startOffset, int endOffset) 

Source Link

Usage

From source file:com.zb.mmseg.analysis.TokenUtils.java

License:Open Source License

/**
 * @param input/*  www .  ja v a2 s.co  m*/
 * @param reusableToken is null well new one auto.
 * @return null - if not next token or input is null.
 * @throws IOException
 */
public static Token nextToken(TokenStream input, Token reusableToken) throws IOException {
    if (input == null) {
        return null;
    }
    if (!input.incrementToken()) {
        return null;
    }

    CharTermAttribute termAtt = input.getAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = input.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = input.getAttribute(TypeAttribute.class);

    if (reusableToken == null) {
        reusableToken = new Token();
    }

    reusableToken.clear();
    if (termAtt != null) {
        // lucene 3.0
        // reusableToken.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
        // lucene 3.1
        reusableToken.copyBuffer(termAtt.buffer(), 0, termAtt.length());
    }
    if (offsetAtt != null) {
        // lucene 3.1
        // reusableToken.setStartOffset(offsetAtt.startOffset());
        // reusableToken.setEndOffset(offsetAtt.endOffset());
        // lucene 4.0
        reusableToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
    }

    if (typeAtt != null) {
        reusableToken.setType(typeAtt.type());
    }

    return reusableToken;
}

From source file:it.cnr.ilc.lc.clavius.search.ClaviusTokenGroup.java

void addToken(float score) {
    if (numTokens < MAX_NUM_TOKENS_PER_GROUP) {
        final int termStartOffset = offsetAtt.startOffset();
        final int termEndOffset = offsetAtt.endOffset();
        if (numTokens == 0) {
            startOffset = matchStartOffset = termStartOffset;
            endOffset = matchEndOffset = termEndOffset;
            tot += score;//from  w w  w  . j  av  a2s.c o  m
        } else {
            startOffset = Math.min(startOffset, termStartOffset);
            endOffset = Math.max(endOffset, termEndOffset);
            if (score > 0) {
                if (tot == 0) {
                    matchStartOffset = termStartOffset;
                    matchEndOffset = termEndOffset;
                } else {
                    matchStartOffset = Math.min(matchStartOffset, termStartOffset);
                    matchEndOffset = Math.max(matchEndOffset, termEndOffset);
                }
                tot += score;
            }
        }
        Token token = new Token();
        token.setOffset(termStartOffset, termEndOffset);
        token.setEmpty().append(termAtt);
        tokens[numTokens] = token;
        scores[numTokens] = score;
        numTokens++;
    }
}

From source file:jaligner.Sequence.java

License:Open Source License

/**
 * Constructor//from  ww  w .ja  v  a  2 s. c  o  m
 * 
 * @param sequence
 */
public Sequence(String sequence, Analyzer analyzer, int max_length) throws IOException {
    super();
    this.sequence = sequence;

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(sequence));
    Token.TokenAttributeFactory tokenAttributeFactory = new Token.TokenAttributeFactory(
            stream.getAttributeFactory());

    Vector<Token> tokenVector = new Vector<Token>();

    while (stream.incrementToken() && tokenVector.size() < max_length) {
        //            Token token = new Token();
        //            Token token = (Token) stream.getAttribute(CharTermAttribute.class);
        Token token = (Token) tokenAttributeFactory.createAttributeInstance(Token.class);

        CharTermAttribute charTerm = stream.getAttribute(CharTermAttribute.class);
        OffsetAttribute offset = stream.getAttribute(OffsetAttribute.class);
        //            PayloadAttribute payload = stream.getAttribute(PayloadAttribute.class);
        //            FlagsAttribute flags = stream.getAttribute(FlagsAttribute.class);

        //        public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
        token.reinit(charTerm.buffer(), 0, charTerm.length(), offset.startOffset(), offset.endOffset());
        token.setOffset(offset.startOffset(), offset.endOffset());

        //            token.setPayload(payload.getPayload());
        //            token.setFlags(flags.getFlags());

        if (stream.hasAttribute(PositionIncrementAttribute.class)) {
            PositionIncrementAttribute positionIncrement = stream
                    .getAttribute(PositionIncrementAttribute.class);
            token.setPositionIncrement(positionIncrement.getPositionIncrement());
        }

        if (stream.hasAttribute(TypeAttribute.class)) {
            TypeAttribute type = stream.getAttribute(TypeAttribute.class);
            token.setType(type.type());
        }

        tokenVector.add(token);
    }

    stream.end();
    stream.close();

    this.tokens = tokenVector.toArray(new Token[tokenVector.size()]);
}

From source file:org.apache.solr.analysis.BufferedTokenStream.java

License:Apache License

/** old api emulation for back compat */
private Token readToken() throws IOException {
    if (!input.incrementToken()) {
        return null;
    } else {//w ww  .j a v a  2s  .  c o m
        Token token = new Token();
        token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
        token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
        token.setType(typeAtt.type());
        token.setFlags(flagsAtt.getFlags());
        token.setPositionIncrement(posIncAtt.getPositionIncrement());
        token.setPayload(payloadAtt.getPayload());
        return token;
    }
}

From source file:org.apache.solr.handler.component.SpellCheckComponent.java

License:Apache License

private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
    Collection<Token> result = new ArrayList<Token>();
    assert analyzer != null;
    TokenStream ts = analyzer.tokenStream("", q);
    try {//from   ww  w.  j av  a2 s .  c  o  m
        ts.reset();
        // TODO: support custom attributes
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
        FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
        PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

        while (ts.incrementToken()) {
            Token token = new Token();
            token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            token.setType(typeAtt.type());
            token.setFlags(flagsAtt.getFlags());
            token.setPayload(payloadAtt.getPayload());
            token.setPositionIncrement(posIncAtt.getPositionIncrement());
            result.add(token);
        }
        ts.end();
        return result;
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}

From source file:org.apache.solr.spelling.SimpleQueryConverter.java

License:Apache License

@Override
public Collection<Token> convert(String origQuery) {
    Collection<Token> result = new HashSet<Token>();
    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40);

    TokenStream ts = null;// w  w w. ja v  a2 s.c  om
    try {
        ts = analyzer.tokenStream("", origQuery);
        // TODO: support custom attributes
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
        FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
        PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

        ts.reset();

        while (ts.incrementToken()) {
            Token tok = new Token();
            tok.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            tok.setFlags(flagsAtt.getFlags());
            tok.setPayload(payloadAtt.getPayload());
            tok.setPositionIncrement(posIncAtt.getPositionIncrement());
            tok.setType(typeAtt.type());
            result.add(tok);
        }
        ts.end();
        return result;
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}

From source file:org.apache.solr.spelling.SpellingQueryConverter.java

License:Apache License

protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue)
        throws IOException {
    TokenStream stream = analyzer.tokenStream("", text);
    // TODO: support custom attributes
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
    OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
    stream.reset();// w w  w .  j  ava  2  s. c o  m
    while (stream.incrementToken()) {
        Token token = new Token();
        token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
        token.setOffset(offset + offsetAtt.startOffset(), offset + offsetAtt.endOffset());
        token.setFlags(flagsAttValue); //overwriting any flags already set...
        token.setType(typeAtt.type());
        token.setPayload(payloadAtt.getPayload());
        token.setPositionIncrement(posIncAtt.getPositionIncrement());
        result.add(token);
    }
    stream.end();
    stream.close();
}