List of usage examples for org.apache.lucene.analysis.cjk CJKBigramFilter CJKBigramFilter
public CJKBigramFilter(TokenStream in, int flags, boolean outputUnigrams)
From source file:org.elasticsearch.analysis.common.CJKBigramFilterFactory.java
License:Apache License
@Override public TokenStream create(TokenStream tokenStream) { CJKBigramFilter filter = new CJKBigramFilter(tokenStream, flags, outputUnigrams); if (outputUnigrams) { /**// w ww. j av a 2 s . c o m * We disable the graph analysis on this token stream * because it produces bigrams AND unigrams. * Graph analysis on such token stream is useless and dangerous as it may create too many paths * since shingles of different size are not aligned in terms of positions. */ filter.addAttribute(DisableGraphAttribute.class); } return filter; }
From source file:org.elasticsearch.index.analysis.CJKBigramFilterFactory.java
License:Apache License
@Override public TokenStream create(TokenStream tokenStream) { return new CJKBigramFilter(tokenStream, flags, outputUnigrams); }
From source file:org.tallison.lucene.search.concordance.ConcordanceTestBase.java
License:Apache License
public static Analyzer getCJKBigramAnalyzer(final boolean outputUnigrams) { return new Analyzer() { @Override/*from w ww .j a v a2 s . c o m*/ public TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new StandardTokenizer(); TokenFilter filter = new CJKBigramFilter(tokenizer, 15, outputUnigrams); return new TokenStreamComponents(tokenizer, filter); } @Override public int getPositionIncrementGap(String fieldName) { return 10; } @Override public int getOffsetGap(String fieldName) { return 10; } }; }