Example usage for org.apache.lucene.analysis.fa PersianAnalyzer PersianAnalyzer

List of usage examples for org.apache.lucene.analysis.fa PersianAnalyzer PersianAnalyzer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.fa PersianAnalyzer PersianAnalyzer.

Prototype

public PersianAnalyzer(CharArraySet stopwords) 

Source Link

Document

Builds an analyzer with the given stop words

Usage

From source file:it.unipd.dei.ims.lucene.clef.AnalyzerFactory.java

License:Apache License

public static Analyzer createAnalyzer(String language, String stemmer, CharArraySet stopset) {

    Analyzer analyzer;/* w  w w.ja va2 s  .c o  m*/

    if (stemmer.equalsIgnoreCase("NONE")) {

        analyzer = new StandardAnalyzer(stopset);

    } else { // otherwise use language-specific analyzer

        switch (language) {
        case "bg":
            analyzer = new BulgarianAnalyzer(stopset);
            break;
        case "de":
            analyzer = new GermanAnalyzer(stopset);
            break;
        case "es":
            analyzer = new SpanishAnalyzer(stopset);
            break;
        case "fa":
            analyzer = new PersianAnalyzer(stopset);
            break;
        case "fi":
            analyzer = new FinnishAnalyzer(stopset);
            break;
        case "fr":
            analyzer = new FrenchAnalyzer(stopset);
            break;
        case "hu":
            analyzer = new HungarianAnalyzer(stopset);
            break;
        case "it":
            analyzer = new ItalianAnalyzer(stopset);
            break;
        case "nl":
            analyzer = new DutchAnalyzer(stopset);
            break;
        case "pt":
            analyzer = new PortugueseAnalyzer(stopset);
            break;
        case "ru":
            analyzer = new RussianAnalyzer(stopset);
            break;
        case "sv":
            analyzer = new SwedishAnalyzer(stopset);
            break;
        default:
            throw new UnsupportedOperationException("Language not supported yet");
        }

    }

    return analyzer;

}

From source file:org.apache.jackrabbit.core.query.lucene.LanguageCustomizingAnalyzerRegistry.java

License:Open Source License

public LanguageCustomizingAnalyzerRegistry(IndexingConfiguration configuration) {
    this.configuration = configuration;

    languageToAnalyzer.put("ar", new AnalyzerWrapper(new ArabicAnalyzer(Version.LUCENE_30), true));
    languageToAnalyzer.put("br", new AnalyzerWrapper(new BrazilianAnalyzer(Version.LUCENE_30), true));
    languageToAnalyzer.put("cjk", new AnalyzerWrapper(new CJKAnalyzer(Version.LUCENE_30), true));
    languageToAnalyzer.put("cn", new AnalyzerWrapper(new ChineseAnalyzer(), true));
    languageToAnalyzer.put("cz", new AnalyzerWrapper(new CzechAnalyzer(Version.LUCENE_30), true));
    languageToAnalyzer.put("de", new AnalyzerWrapper(new GermanAnalyzer(Version.LUCENE_30), true));
    languageToAnalyzer.put("el", new AnalyzerWrapper(new GreekAnalyzer(Version.LUCENE_30), true));
    languageToAnalyzer.put("en", new AnalyzerWrapper(
            new SnowballAnalyzer(Version.LUCENE_30, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET), true));
    languageToAnalyzer.put("fa", new AnalyzerWrapper(new PersianAnalyzer(Version.LUCENE_30), true));
    languageToAnalyzer.put("fr", new AnalyzerWrapper(new FrenchAnalyzer(Version.LUCENE_30), true));
    languageToAnalyzer.put("nl", new AnalyzerWrapper(new DutchAnalyzer(Version.LUCENE_30), true));
    languageToAnalyzer.put("ru", new AnalyzerWrapper(new RussianAnalyzer(Version.LUCENE_30), true));
    languageToAnalyzer.put("th", new AnalyzerWrapper(new ThaiAnalyzer(Version.LUCENE_30), true));
}

From source file:org.elasticsearch.analysis.common.PersianAnalyzerProvider.java

License:Apache License

PersianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new PersianAnalyzer(Analysis.parseStopWords(env, settings, PersianAnalyzer.getDefaultStopSet()));
    analyzer.setVersion(version);//from  www.ja  va  2  s  .  com
}

From source file:org.omegat.tokenizer.LucenePersianTokenizer.java

License:Open Source License

@Override
protected TokenStream getTokenStream(final String strOrig, final boolean stemsAllowed,
        final boolean stopWordsAllowed) {
    if (stemsAllowed) {
        PersianAnalyzer analyzer = stopWordsAllowed ? new PersianAnalyzer(getBehavior())
                : new PersianAnalyzer(getBehavior(), new String[] {});
        return analyzer.tokenStream("", new StringReader(strOrig));
    } else {/*from  w w  w . j ava2 s  . c o m*/
        return new StandardTokenizer(getBehavior(), new StringReader(strOrig));
    }
}

From source file:perLucene.Server.java

License:Open Source License

private static void initAnalyzers() {

    ha = new HashMap<String, Analyzer>();

    ha.put("ar", new ArabicAnalyzer(Version.LUCENE_41));
    ha.put("el", new GreekAnalyzer(Version.LUCENE_41));
    ha.put("bg", new BulgarianAnalyzer(Version.LUCENE_41));
    ha.put("br", new BrazilianAnalyzer(Version.LUCENE_41));
    ha.put("ca", new CatalanAnalyzer(Version.LUCENE_41));
    ha.put("cz", new CzechAnalyzer(Version.LUCENE_41));
    ha.put("da", new DanishAnalyzer(Version.LUCENE_41));
    ha.put("de", new GermanAnalyzer(Version.LUCENE_41));
    ha.put("en", new EnglishAnalyzer(Version.LUCENE_41));
    ha.put("es", new SpanishAnalyzer(Version.LUCENE_41));
    ha.put("eu", new BasqueAnalyzer(Version.LUCENE_41));
    ha.put("fa", new PersianAnalyzer(Version.LUCENE_41));
    ha.put("fi", new FinnishAnalyzer(Version.LUCENE_41));
    ha.put("fr", new FrenchAnalyzer(Version.LUCENE_41));
    ha.put("ga", new IrishAnalyzer(Version.LUCENE_41));
    ha.put("gl", new GalicianAnalyzer(Version.LUCENE_41));
    ha.put("hi", new HindiAnalyzer(Version.LUCENE_41));
    ha.put("hu", new HungarianAnalyzer(Version.LUCENE_41));
    ha.put("hy", new ArmenianAnalyzer(Version.LUCENE_41));
    ha.put("id", new IndonesianAnalyzer(Version.LUCENE_41));
    ha.put("it", new ItalianAnalyzer(Version.LUCENE_41));
    ha.put("lv", new LatvianAnalyzer(Version.LUCENE_41));
    ha.put("nl", new DutchAnalyzer(Version.LUCENE_41));
    ha.put("no", new NorwegianAnalyzer(Version.LUCENE_41));
    ha.put("pt", new PortugueseAnalyzer(Version.LUCENE_41));
    ha.put("ro", new RomanianAnalyzer(Version.LUCENE_41));
    ha.put("ru", new RussianAnalyzer(Version.LUCENE_41));
    ha.put("sv", new SwedishAnalyzer(Version.LUCENE_41));
    ha.put("th", new ThaiAnalyzer(Version.LUCENE_41));
    ha.put("tr", new TurkishAnalyzer(Version.LUCENE_41));
    ha.put("cn", new SmartChineseAnalyzer(Version.LUCENE_41));

}