Example usage for org.apache.lucene.analysis.ar ArabicNormalizationFilter ArabicNormalizationFilter

List of usage examples for org.apache.lucene.analysis.ar ArabicNormalizationFilter ArabicNormalizationFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.ar ArabicNormalizationFilter ArabicNormalizationFilter.

Prototype

public ArabicNormalizationFilter(TokenStream input) 

Source Link

Usage

From source file:org.apache.solr.analysis.ArabicNormalizationFilterFactory.java

License:Apache License

public ArabicNormalizationFilter create(TokenStream input) {
    return new ArabicNormalizationFilter(input);
}

From source file:org.crosswire.jsword.index.lucene.analysis.ArabicLuceneAnalyzer.java

License:Open Source License

@Override
public final TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new ArabicLetterTokenizer(reader);
    result = new LowerCaseFilter(result);
    result = new ArabicNormalizationFilter(result);
    if (doStopWords && stopSet != null) {
        result = new StopFilter(false, result, stopSet);
    }//from   w w w.j av a 2s  .c o  m

    if (doStemming) {
        result = new ArabicStemFilter(result);
    }

    return result;
}

From source file:org.crosswire.jsword.index.lucene.analysis.ArabicLuceneAnalyzer.java

License:Open Source License

@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams(new ArabicLetterTokenizer(reader));
        streams.setResult(new LowerCaseFilter(streams.getResult()));
        streams.setResult(new ArabicNormalizationFilter(streams.getResult()));

        if (doStopWords && stopSet != null) {
            streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                    streams.getResult(), stopSet));
        }//  ww  w .j  a  v a  2s .c o m

        if (doStemming) {
            streams.setResult(new ArabicStemFilter(streams.getResult()));
        }

        setPreviousTokenStream(streams);
    } else {
        streams.getSource().reset(reader);
    }
    return streams.getResult();
}

From source file:org.crosswire.jsword.index.lucene.analysis.PersianLuceneAnalyzer.java

License:Open Source License

@Override
public final TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new ArabicLetterTokenizer(reader);
    result = new LowerCaseFilter(result);
    result = new ArabicNormalizationFilter(result);
    /* additional persian-specific normalization */
    result = new PersianNormalizationFilter(result);
    /*/*from w w w. ja v  a  2 s . c om*/
     * the order here is important: the stop set is normalized with the
     * above!
     */
    if (doStopWords && stopSet != null) {
        result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result,
                stopSet);
    }

    return result;
}

From source file:org.crosswire.jsword.index.lucene.analysis.PersianLuceneAnalyzer.java

License:Open Source License

/**
 * Returns a (possibly reused) {@link TokenStream} which tokenizes all the
 * text in the provided {@link Reader}./*from w w w .j av  a 2 s .  com*/
 * 
 * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer}
 *         filtered with {@link LowerCaseFilter},
 *         {@link ArabicNormalizationFilter},
 *         {@link PersianNormalizationFilter} and Persian Stop words
 */
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams(new ArabicLetterTokenizer(reader));
        streams.setResult(new LowerCaseFilter(streams.getResult()));
        streams.setResult(new ArabicNormalizationFilter(streams.getResult()));
        /* additional persian-specific normalization */
        streams.setResult(new PersianNormalizationFilter(streams.getResult()));
        /*
         * the order here is important: the stop set is normalized with the
         * above!
         */
        if (doStopWords && stopSet != null) {
            streams.setResult(new StopFilter(false, streams.getResult(), stopSet));
        }
        setPreviousTokenStream(streams);
    } else {
        streams.getSource().reset(reader);
    }
    return streams.getResult();
}

From source file:org.elasticsearch.analysis.common.ArabicNormalizationFilterFactory.java

License:Apache License

@Override
public TokenStream create(TokenStream tokenStream) {
    return new ArabicNormalizationFilter(tokenStream);
}