Example usage for org.apache.lucene.analysis.nl DutchAnalyzer getDefaultStopSet

List of usage examples for org.apache.lucene.analysis.nl DutchAnalyzer getDefaultStopSet

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.nl DutchAnalyzer getDefaultStopSet.

Prototype

public static CharArraySet getDefaultStopSet() 

Source Link

Document

Returns an unmodifiable instance of the default stop-words set.

Usage

From source file:com.stratio.cassandra.lucene.schema.analysis.SnowballAnalyzerBuilder.java

License:Apache License

/**
 * Returns the default stopwords set used by Lucene language analyzer for the specified language.
 *
 * @param language The language for which the stopwords are. The supported languages are English, French, Spanish,
 *                 Portuguese, Italian, Romanian, German, Dutch, Swedish, Norwegian, Danish, Russian, Finnish,
 *                 Irish, Hungarian, Turkish, Armenian, Basque and Catalan.
 * @return The default stopwords set used by Lucene language analyzers.
 *///www .  j  a va2s .c om
private static CharArraySet getDefaultStopwords(String language) {
    switch (language) {
    case "English":
        return EnglishAnalyzer.getDefaultStopSet();
    case "French":
        return FrenchAnalyzer.getDefaultStopSet();
    case "Spanish":
        return SpanishAnalyzer.getDefaultStopSet();
    case "Portuguese":
        return PortugueseAnalyzer.getDefaultStopSet();
    case "Italian":
        return ItalianAnalyzer.getDefaultStopSet();
    case "Romanian":
        return RomanianAnalyzer.getDefaultStopSet();
    case "German":
        return GermanAnalyzer.getDefaultStopSet();
    case "Dutch":
        return DutchAnalyzer.getDefaultStopSet();
    case "Swedish":
        return SwedishAnalyzer.getDefaultStopSet();
    case "Norwegian":
        return NorwegianAnalyzer.getDefaultStopSet();
    case "Danish":
        return DanishAnalyzer.getDefaultStopSet();
    case "Russian":
        return RussianAnalyzer.getDefaultStopSet();
    case "Finnish":
        return FinnishAnalyzer.getDefaultStopSet();
    case "Irish":
        return IrishAnalyzer.getDefaultStopSet();
    case "Hungarian":
        return HungarianAnalyzer.getDefaultStopSet();
    case "Turkish":
        return SpanishAnalyzer.getDefaultStopSet();
    case "Armenian":
        return SpanishAnalyzer.getDefaultStopSet();
    case "Basque":
        return BasqueAnalyzer.getDefaultStopSet();
    case "Catalan":
        return CatalanAnalyzer.getDefaultStopSet();
    default:
        return CharArraySet.EMPTY_SET;
    }
}

From source file:com.stratio.cassandra.lucene.schema.analysis.StandardStopwordsTest.java

License:Apache License

@Test
public void testGetDutchPreBuiltAnalyzer() {
    CharArraySet stopwords = StandardStopwords.DUTCH.get();
    assertEquals("Expected another stopwords", DutchAnalyzer.getDefaultStopSet(), stopwords);
}

From source file:it.unipd.dei.ims.lucene.clef.AnalyzerFactory.java

License:Apache License

public static CharArraySet createStopset(String language, String stopsetType, String stopsetPath)
        throws Exception {

    CharArraySet stopset = CharArraySet.EMPTY_SET;

    if (stopsetType.equalsIgnoreCase("CUSTOM")) {

        try {/*from   ww w.ja  v  a2 s.c o  m*/
            File f = new File(stopsetPath);
            stopset = new CharArraySet(0, true);
            Scanner sc = new Scanner(f);
            logger.debug("STOPLIST:");
            while (sc.hasNextLine()) {
                String stopword = sc.nextLine().trim();
                logger.debug("=> " + stopword);
                stopset.add(stopword);
            }
            logger.debug("");
            sc.close();

        } catch (FileNotFoundException e) {
            e.printStackTrace();
            throw new Exception("FileNotFoundException when loading stopset");
        }

    } else if (stopsetType.equalsIgnoreCase("DEFAULT")) {

        switch (language) {
        case "bg":
            stopset = BulgarianAnalyzer.getDefaultStopSet();
            break;
        case "de":
            stopset = GermanAnalyzer.getDefaultStopSet();
            break;
        case "es":
            stopset = SpanishAnalyzer.getDefaultStopSet();
            break;
        case "fa":
            stopset = PersianAnalyzer.getDefaultStopSet();
            break;
        case "fi":
            stopset = FinnishAnalyzer.getDefaultStopSet();
            break;
        case "fr":
            stopset = FrenchAnalyzer.getDefaultStopSet();
            break;
        case "hu":
            stopset = HungarianAnalyzer.getDefaultStopSet();
            break;
        case "it":
            stopset = ItalianAnalyzer.getDefaultStopSet();
            break;
        case "nl":
            stopset = DutchAnalyzer.getDefaultStopSet();
            break;
        case "pt":
            stopset = PortugueseAnalyzer.getDefaultStopSet();
            break;
        case "ru":
            stopset = RussianAnalyzer.getDefaultStopSet();
            break;
        case "sv":
            stopset = SwedishAnalyzer.getDefaultStopSet();
            break;
        default:
            throw new UnsupportedOperationException("Language not supported yet");
        }

    }

    return stopset;
}

From source file:nl.b3p.viewer.stripes.CatalogSearchActionBean.java

License:Open Source License

private static Or createOrFilter(String queryString, String propertyName) {
    List orList = new ArrayList();
    queryString = createQueryString(queryString, false);
    if (queryString != null && !queryString.trim().equals(defaultWildCard)) {

        propertyName = createPropertyName(propertyName);

        PropertyIsEqualTo propertyIsEqualTo = FilterCreator.createPropertyIsEqualTo(queryString, propertyName);

        StandardAnalyzer standardAnalyzer = new StandardAnalyzer(Version.LUCENE_45,
                DutchAnalyzer.getDefaultStopSet());

        orList.add(propertyIsEqualTo);/*from   w  w w  .  j  av  a2 s .co m*/
        try {

            TokenStream tokenStream = standardAnalyzer.tokenStream("", queryString);
            OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);
            CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);

            tokenStream.reset();
            while (tokenStream.incrementToken()) {
                int startOffset = offsetAttribute.startOffset();
                int endOffset = offsetAttribute.endOffset();
                String term = charTermAttribute.toString();
                PropertyIsLike propertyIsLike = FilterCreator.createPropertyIsLike(term, propertyName);
                orList.add(propertyIsLike);
            }
            tokenStream.close();
        } catch (IOException e) {
            PropertyIsLike propertyIsLike = FilterCreator.createPropertyIsLike(queryString, propertyName);
            orList.add(propertyIsLike);
        }
    }

    Or or = new Or(new BinaryLogicOpType(orList));

    return or;
}

From source file:org.elasticsearch.analysis.common.DutchAnalyzerProvider.java

License:Apache License

DutchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new DutchAnalyzer(Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet()),
            Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);/*from  w  w  w .  jav a  2  s  .com*/
}

From source file:org.elasticsearch.index.analysis.DutchAnalyzerProvider.java

License:Apache License

@Inject
public DutchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env,
        @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettings, name, settings);
    analyzer = new DutchAnalyzer(version,
            Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet(), version),
            Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
}