Example usage for org.apache.lucene.analysis.de GermanAnalyzer getDefaultStopSet

List of usage examples for org.apache.lucene.analysis.de GermanAnalyzer getDefaultStopSet

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.de GermanAnalyzer getDefaultStopSet.

Prototype

public static final CharArraySet getDefaultStopSet() 

Source Link

Document

Returns a set of default German-stopwords

Usage

From source file:com.stratio.cassandra.lucene.schema.analysis.SnowballAnalyzerBuilder.java

License:Apache License

/**
 * Returns the default stopwords set used by Lucene language analyzer for the specified language.
 *
 * @param language The language for which the stopwords are. The supported languages are English, French, Spanish,
 *                 Portuguese, Italian, Romanian, German, Dutch, Swedish, Norwegian, Danish, Russian, Finnish,
 *                 Irish, Hungarian, Turkish, Armenian, Basque and Catalan.
 * @return The default stopwords set used by Lucene language analyzers.
 *//*from www  . j av a2 s.  co m*/
private static CharArraySet getDefaultStopwords(String language) {
    switch (language) {
    case "English":
        return EnglishAnalyzer.getDefaultStopSet();
    case "French":
        return FrenchAnalyzer.getDefaultStopSet();
    case "Spanish":
        return SpanishAnalyzer.getDefaultStopSet();
    case "Portuguese":
        return PortugueseAnalyzer.getDefaultStopSet();
    case "Italian":
        return ItalianAnalyzer.getDefaultStopSet();
    case "Romanian":
        return RomanianAnalyzer.getDefaultStopSet();
    case "German":
        return GermanAnalyzer.getDefaultStopSet();
    case "Dutch":
        return DutchAnalyzer.getDefaultStopSet();
    case "Swedish":
        return SwedishAnalyzer.getDefaultStopSet();
    case "Norwegian":
        return NorwegianAnalyzer.getDefaultStopSet();
    case "Danish":
        return DanishAnalyzer.getDefaultStopSet();
    case "Russian":
        return RussianAnalyzer.getDefaultStopSet();
    case "Finnish":
        return FinnishAnalyzer.getDefaultStopSet();
    case "Irish":
        return IrishAnalyzer.getDefaultStopSet();
    case "Hungarian":
        return HungarianAnalyzer.getDefaultStopSet();
    case "Turkish":
        return SpanishAnalyzer.getDefaultStopSet();
    case "Armenian":
        return SpanishAnalyzer.getDefaultStopSet();
    case "Basque":
        return BasqueAnalyzer.getDefaultStopSet();
    case "Catalan":
        return CatalanAnalyzer.getDefaultStopSet();
    default:
        return CharArraySet.EMPTY_SET;
    }
}

From source file:com.stratio.cassandra.lucene.schema.analysis.StandardStopwordsTest.java

License:Apache License

@Test
public void testGetGermanPreBuiltAnalyzer() {
    CharArraySet stopwords = StandardStopwords.GERMAN.get();
    assertEquals("Expected another stopwords", GermanAnalyzer.getDefaultStopSet(), stopwords);
}

From source file:ie.cmrc.smtx.lucene.analysis.EuropeanAnalyzer.java

License:Apache License

/**
 * Gets the stop words set for the provided language
 * @param language Two-letter code of a language
 * @return {@code CharArraySet} containing the stop words of the provided language.
 * If the provided language is not supported,then the Lucene standard stop words set
 * if returned./* w  w  w.ja  v  a2s. c  o m*/
 */
protected CharArraySet getStopWordsSet(String language) {
    String lang = language;
    if (lang != null)
        lang = lang.trim().toLowerCase();
    CharArraySet charArraySet = cache.get(lang);
    if (charArraySet == null) {
        if (SUPPORTED_LANGUAGES.contains(lang)) {
            if (lang.equals(LANG_EN)) {
                charArraySet = EnglishAnalyzer.getDefaultStopSet();
            } else if (lang.equals(LANG_FR)) {
                charArraySet = FrenchAnalyzer.getDefaultStopSet();
            } else if (lang.equals(LANG_ES)) {
                charArraySet = SpanishAnalyzer.getDefaultStopSet();
            } else if (lang.equals(LANG_PT)) {
                charArraySet = PortugueseAnalyzer.getDefaultStopSet();
            } else if (lang.equals(LANG_IT)) {
                charArraySet = ItalianAnalyzer.getDefaultStopSet();
            } else if (lang.equals(LANG_DE)) {
                charArraySet = GermanAnalyzer.getDefaultStopSet();
            } else if (lang.equals(LANG_NO)) {
                charArraySet = NorwegianAnalyzer.getDefaultStopSet();
            }
        } else {
            charArraySet = StandardAnalyzer.STOP_WORDS_SET;
        }
        cache.put(lang, charArraySet);
    }
    return charArraySet;
}

From source file:it.unipd.dei.ims.lucene.clef.AnalyzerFactory.java

License:Apache License

public static CharArraySet createStopset(String language, String stopsetType, String stopsetPath)
        throws Exception {

    CharArraySet stopset = CharArraySet.EMPTY_SET;

    if (stopsetType.equalsIgnoreCase("CUSTOM")) {

        try {/*w  w w .jav  a 2s . com*/
            File f = new File(stopsetPath);
            stopset = new CharArraySet(0, true);
            Scanner sc = new Scanner(f);
            logger.debug("STOPLIST:");
            while (sc.hasNextLine()) {
                String stopword = sc.nextLine().trim();
                logger.debug("=> " + stopword);
                stopset.add(stopword);
            }
            logger.debug("");
            sc.close();

        } catch (FileNotFoundException e) {
            e.printStackTrace();
            throw new Exception("FileNotFoundException when loading stopset");
        }

    } else if (stopsetType.equalsIgnoreCase("DEFAULT")) {

        switch (language) {
        case "bg":
            stopset = BulgarianAnalyzer.getDefaultStopSet();
            break;
        case "de":
            stopset = GermanAnalyzer.getDefaultStopSet();
            break;
        case "es":
            stopset = SpanishAnalyzer.getDefaultStopSet();
            break;
        case "fa":
            stopset = PersianAnalyzer.getDefaultStopSet();
            break;
        case "fi":
            stopset = FinnishAnalyzer.getDefaultStopSet();
            break;
        case "fr":
            stopset = FrenchAnalyzer.getDefaultStopSet();
            break;
        case "hu":
            stopset = HungarianAnalyzer.getDefaultStopSet();
            break;
        case "it":
            stopset = ItalianAnalyzer.getDefaultStopSet();
            break;
        case "nl":
            stopset = DutchAnalyzer.getDefaultStopSet();
            break;
        case "pt":
            stopset = PortugueseAnalyzer.getDefaultStopSet();
            break;
        case "ru":
            stopset = RussianAnalyzer.getDefaultStopSet();
            break;
        case "sv":
            stopset = SwedishAnalyzer.getDefaultStopSet();
            break;
        default:
            throw new UnsupportedOperationException("Language not supported yet");
        }

    }

    return stopset;
}

From source file:org.crosswire.jsword.index.lucene.analysis.GermanLuceneAnalyzer.java

License:Open Source License

public GermanLuceneAnalyzer() {
    stopSet = GermanAnalyzer.getDefaultStopSet();
}

From source file:org.efaps.esjp.admin.index.AnalyzerProvider_Base.java

License:Apache License

/**
 * Gets the analyzer.//ww  w  .  ja  v  a2 s  .  co m
 *
 * @param _companyId the _company id
 * @param _language the _language
 * @return the analyzer
 */
public Analyzer getAnalyzer(final Long _companyId, final String _language) {
    final StandardAnalyzer ret;
    switch (_language) {
    case "de":
        ret = new StandardAnalyzer(GermanAnalyzer.getDefaultStopSet());
        break;
    case "es":
        ret = new StandardAnalyzer(SpanishAnalyzer.getDefaultStopSet());
        break;
    case "en":
    default:
        ret = new StandardAnalyzer(EnglishAnalyzer.getDefaultStopSet());
        break;
    }
    return ret;
}

From source file:org.elasticsearch.analysis.common.GermanAnalyzerProvider.java

License:Apache License

GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new GermanAnalyzer(Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet()),
            Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);/* w  w  w .j a v  a  2s .co m*/
}

From source file:org.elasticsearch.index.analysis.GermanAnalyzerProvider.java

License:Apache License

@Inject
public GermanAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env,
        @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettings, name, settings);
    analyzer = new GermanAnalyzer(version,
            Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet(), version),
            Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}

From source file:org.elasticsearch.neolastic.NeolasticAnalyser.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final Tokenizer tok = new StandardTokenizer(version, reader);
    TokenStream filter = new LowerCaseFilter(version, tok);

    filter = new StopFilter(version, filter, GermanAnalyzer.getDefaultStopSet());

    return new TokenStreamComponents(tok, new NeolasticTokenFilter(filter, synonymsService));
}

From source file:org.jahia.services.search.analyzer.GermanSnowballAnalyzer.java

License:Open Source License

public GermanSnowballAnalyzer() {
    super(new SnowballAnalyzer(Version.LUCENE_30, "German", GermanAnalyzer.getDefaultStopSet()));
}