com.redhat.satellite.search.index.ngram.NGramAnalyzer.java Source code

Java tutorial

Introduction

Here is the source code for com.redhat.satellite.search.index.ngram.NGramAnalyzer.java

Source

/**
 * Copyright (c) 2008--2015 Red Hat, Inc.
 *
 * This software is licensed to you under the GNU General Public License,
 * version 2 (GPLv2). There is NO WARRANTY for this software, express or
 * implied, including the implied warranties of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2
 * along with this software; if not, see
 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
 *
 * Red Hat trademarks are not licensed under GPLv2. No permission is
 * granted to use or replicate Red Hat trademarks that are incorporated
 * in this software or its documentation.
 */

package com.redhat.satellite.search.index.ngram;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.TokenStream;
// From Lucene Sandbox
import org.apache.lucene.analysis.ngram.NGramTokenFilter;

import java.io.Reader;

/**
 * NGramAnalyzer
 * A ngram will take a term and break it up into a series of smaller
 * permutations of different letter combinations.
 * @version $Rev$
 */
public class NGramAnalyzer extends Analyzer {
    //Controls minimum size ngram to construct
    protected int min_ngram;
    //Controls maximum size ngram to construct
    protected int max_ngram;

    /**
     * Constructor
     * @param min min length of ngram to generate
     * @param max max length of ngram to generate
     */
    public NGramAnalyzer(int min, int max) {
        super();
        min_ngram = min;
        max_ngram = max;
    }

    /**
     * Constructs a pre populated
     * @param reader contains data to parse
     * @return TokenStream of ngrams
     */
    public TokenStream tokenStream(Reader reader) {
        return tokenStream(null, reader);
    }

    /**
     * @param fieldName ignored param
     * @param reader contains data to parse
     * @return TokenStream of ngrams
     */
    public TokenStream tokenStream(String fieldName, Reader reader) {
        return new NGramTokenFilter(new LowerCaseFilter(new StandardFilter(new StandardTokenizer(reader))),
                min_ngram, max_ngram);
    }
}