List of usage examples for org.apache.lucene.analysis.core WhitespaceAnalyzer tokenStream
public final TokenStream tokenStream(final String fieldName, final Reader reader)
fieldName, tokenizing the contents of reader. From source file:org.apache.solr.spelling.SimpleQueryConverter.java
License:Apache License
@Override public Collection<Token> convert(String origQuery) { Collection<Token> result = new HashSet<Token>(); WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40); TokenStream ts = null;// w w w .j a va 2s . c o m try { ts = analyzer.tokenStream("", origQuery); // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); while (ts.incrementToken()) { Token tok = new Token(); tok.copyBuffer(termAtt.buffer(), 0, termAtt.length()); tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); tok.setFlags(flagsAtt.getFlags()); tok.setPayload(payloadAtt.getPayload()); tok.setPositionIncrement(posIncAtt.getPositionIncrement()); tok.setType(typeAtt.type()); result.add(tok); } ts.end(); return result; } catch (IOException e) { throw new RuntimeException(e); } finally { IOUtils.closeWhileHandlingException(ts); } }
From source file:org.gridkit.coherence.search.lucene.TokenStreamCheck.java
License:Apache License
@Test public void analyze() throws IOException { WhitespaceAnalyzer wa = new WhitespaceAnalyzer(Version.LUCENE_42); wa.getOffsetGap("xxx"); TokenStream ts = wa.tokenStream("test", new StringReader("red black tree")); ts.reset();/*from w ww . j av a 2s.c om*/ ts.incrementToken(); ts.getAttribute(CharTermAttribute.class).buffer(); CapturedTokenStream cts = new CapturedTokenStream(ts); cts.reset(); cts.incrementToken(); cts.getAttribute(CharTermAttribute.class).buffer(); }