Example usage for org.apache.lucene.analysis.core WhitespaceAnalyzer tokenStream

List of usage examples for org.apache.lucene.analysis.core WhitespaceAnalyzer tokenStream

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.core WhitespaceAnalyzer tokenStream.

Prototype

public final TokenStream tokenStream(final String fieldName, final Reader reader) 

Source Link

Document

Returns a TokenStream suitable for fieldName, tokenizing the contents of reader.

Usage

From source file:org.apache.solr.spelling.SimpleQueryConverter.java

License:Apache License

@Override
public Collection<Token> convert(String origQuery) {
    Collection<Token> result = new HashSet<Token>();
    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40);

    TokenStream ts = null;//  w  w  w .j a va 2s . c  o  m
    try {
        ts = analyzer.tokenStream("", origQuery);
        // TODO: support custom attributes
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
        FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
        PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

        ts.reset();

        while (ts.incrementToken()) {
            Token tok = new Token();
            tok.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            tok.setFlags(flagsAtt.getFlags());
            tok.setPayload(payloadAtt.getPayload());
            tok.setPositionIncrement(posIncAtt.getPositionIncrement());
            tok.setType(typeAtt.type());
            result.add(tok);
        }
        ts.end();
        return result;
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}

From source file:org.gridkit.coherence.search.lucene.TokenStreamCheck.java

License:Apache License

@Test
public void analyze() throws IOException {

    WhitespaceAnalyzer wa = new WhitespaceAnalyzer(Version.LUCENE_42);
    wa.getOffsetGap("xxx");
    TokenStream ts = wa.tokenStream("test", new StringReader("red black tree"));
    ts.reset();/*from  w ww  . j  av  a  2s.c  om*/
    ts.incrementToken();
    ts.getAttribute(CharTermAttribute.class).buffer();

    CapturedTokenStream cts = new CapturedTokenStream(ts);
    cts.reset();
    cts.incrementToken();
    cts.getAttribute(CharTermAttribute.class).buffer();
}