Example usage for org.apache.lucene.analysis.ngram NGramTokenizer DEFAULT_MIN_NGRAM_SIZE

List of usage examples for org.apache.lucene.analysis.ngram NGramTokenizer DEFAULT_MIN_NGRAM_SIZE

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.ngram NGramTokenizer DEFAULT_MIN_NGRAM_SIZE.

Prototype

int DEFAULT_MIN_NGRAM_SIZE

To view the source code for org.apache.lucene.analysis.ngram NGramTokenizer DEFAULT_MIN_NGRAM_SIZE.

Click Source Link

Usage

From source file:org.apache.solr.analysis.NGramTokenizerFactory.java

License:Apache License

/** Initializes the n-gram min and max sizes and the side from which one should start tokenizing. */
@Override// w w w .j av  a2  s.  co  m
public void init(Map<String, String> args) {
    super.init(args);
    String maxArg = args.get("maxGramSize");
    maxGramSize = (maxArg != null ? Integer.parseInt(maxArg) : NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);

    String minArg = args.get("minGramSize");
    minGramSize = (minArg != null ? Integer.parseInt(minArg) : NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
}

From source file:org.elasticsearch.analysis.common.EdgeNGramTokenizerFactory.java

License:Apache License

EdgeNGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name,
        Settings settings) {//from   w  w w  . j av a2s  .c  om
    super(indexSettings, settings);
    this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
    this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
    this.matcher = parseTokenChars(settings.getAsList("token_chars"));
}

From source file:org.elasticsearch.analysis.common.NGramTokenizerFactory.java

License:Apache License

NGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    super(indexSettings, settings);
    int maxAllowedNgramDiff = indexSettings.getMaxNgramDiff();
    this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
    this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
    int ngramDiff = maxGram - minGram;
    if (ngramDiff > maxAllowedNgramDiff) {
        if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_0_0_alpha1)) {
            throw new IllegalArgumentException(
                    "The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: ["
                            + maxAllowedNgramDiff + "] but was [" + ngramDiff
                            + "]. This limit can be set by changing the ["
                            + IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey() + "] index level setting.");
        } else {//from   ww  w.j  a va 2 s  .  c  o  m
            deprecationLogger
                    .deprecated("Deprecated big difference between max_gram and min_gram in NGram Tokenizer,"
                            + "expected difference must be less than or equal to: [" + maxAllowedNgramDiff
                            + "]");
        }
    }
    this.matcher = parseTokenChars(settings.getAsList("token_chars"));
}

From source file:org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory.java

License:Apache License

@Inject
public EdgeNGramTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name,
        @Assisted Settings settings) {// w w w . java2s . c om
    super(index, indexSettings, name, settings);
    this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
    this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
    this.side = Lucene43EdgeNGramTokenizer.Side
            .getSide(settings.get("side", Lucene43EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
    this.matcher = parseTokenChars(settings.getAsArray("token_chars"));
    this.esVersion = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED,
            org.elasticsearch.Version.CURRENT);
}

From source file:org.elasticsearch.index.analysis.NGramTokenizerFactory.java

License:Apache License

@Inject
public NGramTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name,
        @Assisted Settings settings) {/*w w w  .j  a  v  a 2s  .c o  m*/
    super(index, indexSettings, name, settings);
    this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
    this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
    this.matcher = parseTokenChars(settings.getAsArray("token_chars"));
    this.esVersion = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED,
            org.elasticsearch.Version.CURRENT);
}