List of usage examples for org.apache.lucene.analysis.ngram NGramTokenizer DEFAULT_MIN_NGRAM_SIZE
int DEFAULT_MIN_NGRAM_SIZE
To view the source code for org.apache.lucene.analysis.ngram NGramTokenizer DEFAULT_MIN_NGRAM_SIZE.
Click Source Link
From source file:org.apache.solr.analysis.NGramTokenizerFactory.java
License:Apache License
/** Initializes the n-gram min and max sizes and the side from which one should start tokenizing. */ @Override// w w w .j av a2 s. co m public void init(Map<String, String> args) { super.init(args); String maxArg = args.get("maxGramSize"); maxGramSize = (maxArg != null ? Integer.parseInt(maxArg) : NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE); String minArg = args.get("minGramSize"); minGramSize = (minArg != null ? Integer.parseInt(minArg) : NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE); }
From source file:org.elasticsearch.analysis.common.EdgeNGramTokenizerFactory.java
License:Apache License
EdgeNGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name,
Settings settings) {//from w w w . j av a2s .c om
super(indexSettings, settings);
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
this.matcher = parseTokenChars(settings.getAsList("token_chars"));
}
From source file:org.elasticsearch.analysis.common.NGramTokenizerFactory.java
License:Apache License
NGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, settings);
int maxAllowedNgramDiff = indexSettings.getMaxNgramDiff();
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
int ngramDiff = maxGram - minGram;
if (ngramDiff > maxAllowedNgramDiff) {
if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_0_0_alpha1)) {
throw new IllegalArgumentException(
"The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: ["
+ maxAllowedNgramDiff + "] but was [" + ngramDiff
+ "]. This limit can be set by changing the ["
+ IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey() + "] index level setting.");
} else {//from ww w.j a va 2 s . c o m
deprecationLogger
.deprecated("Deprecated big difference between max_gram and min_gram in NGram Tokenizer,"
+ "expected difference must be less than or equal to: [" + maxAllowedNgramDiff
+ "]");
}
}
this.matcher = parseTokenChars(settings.getAsList("token_chars"));
}
From source file:org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory.java
License:Apache License
@Inject public EdgeNGramTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {// w w w . java2s . c om super(index, indexSettings, name, settings); this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE); this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE); this.side = Lucene43EdgeNGramTokenizer.Side .getSide(settings.get("side", Lucene43EdgeNGramTokenizer.DEFAULT_SIDE.getLabel())); this.matcher = parseTokenChars(settings.getAsArray("token_chars")); this.esVersion = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT); }
From source file:org.elasticsearch.index.analysis.NGramTokenizerFactory.java
License:Apache License
@Inject public NGramTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {/*w w w .j a v a 2s .c o m*/ super(index, indexSettings, name, settings); this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE); this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE); this.matcher = parseTokenChars(settings.getAsArray("token_chars")); this.esVersion = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT); }