List of usage examples for org.apache.lucene.analysis.shingle ShingleFilterFactory ShingleFilterFactory
public ShingleFilterFactory(Map<String, String> args)
From source file:yasoco.ShingleAnalyzer.java
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { TokenStream result = null;// w w w.j av a 2s .c o m Tokenizer source = new UAX29URLEmailTokenizer(Version.LUCENE_46, reader); Map<String, String> shingleFilterParams = new HashMap<>(); int minShingleSize = Integer.parseInt(prop.getProperty("minShingleSize", "2")); int maxShingleSize = Integer.parseInt(prop.getProperty("maxShingleSize", "3")); if (minShingleSize == 1 || maxShingleSize < minShingleSize) { // we don't want n-gram indexing result = source; } else { shingleFilterParams.put("minShingleSize", String.valueOf(minShingleSize)); shingleFilterParams.put("maxShingleSize", String.valueOf(maxShingleSize)); shingleFilterParams.put("tokenSeparator", "#"); // looks good in luke shingleFilterParams.put("outputUnigrams", "true"); shingleFilterParams.put("outputUnigramsIfNoShingles", "true"); result = new ShingleFilterFactory(shingleFilterParams).create(source); } return new TokenStreamComponents(source, result); }