Example usage for org.apache.lucene.analysis Token copyBuffer

List of usage examples for org.apache.lucene.analysis Token copyBuffer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis Token copyBuffer.

Prototype

@Override
    public final void copyBuffer(char[] buffer, int offset, int length) 

Source Link

Usage

From source file:com.zb.mmseg.analysis.TokenUtils.java

License:Open Source License

/**
 * @param input//from  w w w . j a v  a  2  s  . c  o  m
 * @param reusableToken is null well new one auto.
 * @return null - if not next token or input is null.
 * @throws IOException
 */
public static Token nextToken(TokenStream input, Token reusableToken) throws IOException {
    if (input == null) {
        return null;
    }
    if (!input.incrementToken()) {
        return null;
    }

    CharTermAttribute termAtt = input.getAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = input.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = input.getAttribute(TypeAttribute.class);

    if (reusableToken == null) {
        reusableToken = new Token();
    }

    reusableToken.clear();
    if (termAtt != null) {
        // lucene 3.0
        // reusableToken.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
        // lucene 3.1
        reusableToken.copyBuffer(termAtt.buffer(), 0, termAtt.length());
    }
    if (offsetAtt != null) {
        // lucene 3.1
        // reusableToken.setStartOffset(offsetAtt.startOffset());
        // reusableToken.setEndOffset(offsetAtt.endOffset());
        // lucene 4.0
        reusableToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
    }

    if (typeAtt != null) {
        reusableToken.setType(typeAtt.type());
    }

    return reusableToken;
}

From source file:eu.socialsensor.framework.client.lucene.TweetTokenizerImpl.java

License:Apache License

/**
* Fills Lucene token with the current token text.
*//*ww  w .  ja v a  2 s  . c  om*/
final void getText(Token t) {
    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}

From source file:org.apache.solr.analysis.BufferedTokenStream.java

License:Apache License

/** old api emulation for back compat */
private Token readToken() throws IOException {
    if (!input.incrementToken()) {
        return null;
    } else {//from ww w .j a  v a 2s.c om
        Token token = new Token();
        token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
        token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
        token.setType(typeAtt.type());
        token.setFlags(flagsAtt.getFlags());
        token.setPositionIncrement(posIncAtt.getPositionIncrement());
        token.setPayload(payloadAtt.getPayload());
        return token;
    }
}

From source file:org.apache.solr.analysis.SlowSynonymMap.java

License:Apache License

/**
 * Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
 * the tokens end up at the same position.
 *
 * Example:  [a b] merged with [c d] produces [a/b c/d]  ('/' denotes tokens in the same position)
 * Example:  [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2]  (a,n means a has posInc=n)
 *
 *///w  ww  .ja va 2s  .c  o m
public static List<Token> mergeTokens(List<Token> lst1, List<Token> lst2) {
    ArrayList<Token> result = new ArrayList<Token>();
    if (lst1 == null || lst2 == null) {
        if (lst2 != null)
            result.addAll(lst2);
        if (lst1 != null)
            result.addAll(lst1);
        return result;
    }

    int pos = 0;
    Iterator<Token> iter1 = lst1.iterator();
    Iterator<Token> iter2 = lst2.iterator();
    Token tok1 = iter1.hasNext() ? iter1.next() : null;
    Token tok2 = iter2.hasNext() ? iter2.next() : null;
    int pos1 = tok1 != null ? tok1.getPositionIncrement() : 0;
    int pos2 = tok2 != null ? tok2.getPositionIncrement() : 0;
    while (tok1 != null || tok2 != null) {
        while (tok1 != null && (pos1 <= pos2 || tok2 == null)) {
            Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
            tok.copyBuffer(tok1.buffer(), 0, tok1.length());
            tok.setPositionIncrement(pos1 - pos);
            result.add(tok);
            pos = pos1;
            tok1 = iter1.hasNext() ? iter1.next() : null;
            pos1 += tok1 != null ? tok1.getPositionIncrement() : 0;
        }
        while (tok2 != null && (pos2 <= pos1 || tok1 == null)) {
            Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
            tok.copyBuffer(tok2.buffer(), 0, tok2.length());
            tok.setPositionIncrement(pos2 - pos);
            result.add(tok);
            pos = pos2;
            tok2 = iter2.hasNext() ? iter2.next() : null;
            pos2 += tok2 != null ? tok2.getPositionIncrement() : 0;
        }
    }
    return result;
}

From source file:org.apache.solr.handler.component.SpellCheckComponent.java

License:Apache License

private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
    Collection<Token> result = new ArrayList<Token>();
    assert analyzer != null;
    TokenStream ts = analyzer.tokenStream("", q);
    try {//  w  w w . j  a va2  s  .  co  m
        ts.reset();
        // TODO: support custom attributes
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
        FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
        PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

        while (ts.incrementToken()) {
            Token token = new Token();
            token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            token.setType(typeAtt.type());
            token.setFlags(flagsAtt.getFlags());
            token.setPayload(payloadAtt.getPayload());
            token.setPositionIncrement(posIncAtt.getPositionIncrement());
            result.add(token);
        }
        ts.end();
        return result;
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}

From source file:org.apache.solr.spelling.SimpleQueryConverter.java

License:Apache License

@Override
public Collection<Token> convert(String origQuery) {
    Collection<Token> result = new HashSet<Token>();
    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40);

    TokenStream ts = null;/* www . j a va 2 s . co m*/
    try {
        ts = analyzer.tokenStream("", origQuery);
        // TODO: support custom attributes
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
        FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
        PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

        ts.reset();

        while (ts.incrementToken()) {
            Token tok = new Token();
            tok.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            tok.setFlags(flagsAtt.getFlags());
            tok.setPayload(payloadAtt.getPayload());
            tok.setPositionIncrement(posIncAtt.getPositionIncrement());
            tok.setType(typeAtt.type());
            result.add(tok);
        }
        ts.end();
        return result;
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}

From source file:org.apache.solr.spelling.SpellingQueryConverter.java

License:Apache License

protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue)
        throws IOException {
    TokenStream stream = analyzer.tokenStream("", text);
    // TODO: support custom attributes
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
    OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
    stream.reset();//from w  ww .  j  av a  2s  .  c  o m
    while (stream.incrementToken()) {
        Token token = new Token();
        token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
        token.setOffset(offset + offsetAtt.startOffset(), offset + offsetAtt.endOffset());
        token.setFlags(flagsAttValue); //overwriting any flags already set...
        token.setType(typeAtt.type());
        token.setPayload(payloadAtt.getPayload());
        token.setPositionIncrement(posIncAtt.getPositionIncrement());
        result.add(token);
    }
    stream.end();
    stream.close();
}

From source file:org.hibernate.search.util.AnalyzerUtils.java

License:Open Source License

public static Token[] tokensFromAnalysis(Analyzer analyzer, String field, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    List<Token> tokenList = new ArrayList<Token>();
    while (stream.incrementToken()) {
        Token token = new Token();
        token.copyBuffer(term.buffer(), 0, term.length());
        tokenList.add(token);//  w  w w  .  j  a v  a2 s .  c  o  m
    }

    return tokenList.toArray(new Token[tokenList.size()]);
}