Example usage for org.apache.lucene.analysis.fi FinnishAnalyzer FinnishAnalyzer

List of usage examples for org.apache.lucene.analysis.fi FinnishAnalyzer FinnishAnalyzer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.fi FinnishAnalyzer FinnishAnalyzer.

Prototype

public FinnishAnalyzer() 

Source Link

Document

Builds an analyzer with the default stop words: #DEFAULT_STOPWORD_FILE .

Usage

From source file:de.mirkosertic.desktopsearch.AnalyzerCache.java

License:Open Source License

public AnalyzerCache(Configuration aConfiguration) {
    standardAnalyzer = configure(new StandardAnalyzer());
    analyzerByLanguage = new HashMap<>();

    registerIfEnabled(SupportedLanguage.ar, aConfiguration, configure(new ArabicAnalyzer()));
    registerIfEnabled(SupportedLanguage.bg, aConfiguration, configure(new BulgarianAnalyzer()));
    registerIfEnabled(SupportedLanguage.br, aConfiguration, configure(new BrazilianAnalyzer()));
    registerIfEnabled(SupportedLanguage.ca, aConfiguration, configure(new CatalanAnalyzer()));
    registerIfEnabled(SupportedLanguage.ckb, aConfiguration, configure(new SoraniAnalyzer()));
    registerIfEnabled(SupportedLanguage.cz, aConfiguration, configure(new CzechAnalyzer()));
    registerIfEnabled(SupportedLanguage.da, aConfiguration, configure(new DanishAnalyzer()));
    registerIfEnabled(SupportedLanguage.de, aConfiguration, configure(new GermanAnalyzer()));
    registerIfEnabled(SupportedLanguage.el, aConfiguration, configure(new GreekAnalyzer()));
    registerIfEnabled(SupportedLanguage.en, aConfiguration, configure(new EnglishAnalyzer()));
    registerIfEnabled(SupportedLanguage.es, aConfiguration, configure(new SpanishAnalyzer()));
    registerIfEnabled(SupportedLanguage.eu, aConfiguration, configure(new BasqueAnalyzer()));
    registerIfEnabled(SupportedLanguage.fa, aConfiguration, configure(new PersianAnalyzer()));
    registerIfEnabled(SupportedLanguage.fi, aConfiguration, configure(new FinnishAnalyzer()));
    registerIfEnabled(SupportedLanguage.fr, aConfiguration, configure(new FrenchAnalyzer()));
    registerIfEnabled(SupportedLanguage.ga, aConfiguration, configure(new IrishAnalyzer()));
    registerIfEnabled(SupportedLanguage.gl, aConfiguration, configure(new GalicianAnalyzer()));
    registerIfEnabled(SupportedLanguage.hi, aConfiguration, configure(new HindiAnalyzer()));
    registerIfEnabled(SupportedLanguage.hu, aConfiguration, configure(new HungarianAnalyzer()));
    registerIfEnabled(SupportedLanguage.hy, aConfiguration, configure(new ArmenianAnalyzer()));
    registerIfEnabled(SupportedLanguage.id, aConfiguration, configure(new IndonesianAnalyzer()));
    registerIfEnabled(SupportedLanguage.it, aConfiguration, configure(new ItalianAnalyzer()));
    registerIfEnabled(SupportedLanguage.lv, aConfiguration, configure(new LatvianAnalyzer()));
    registerIfEnabled(SupportedLanguage.nl, aConfiguration, configure(new DutchAnalyzer()));
    registerIfEnabled(SupportedLanguage.no, aConfiguration, configure(new NorwegianAnalyzer()));
    registerIfEnabled(SupportedLanguage.pt, aConfiguration, configure(new PortugueseAnalyzer()));
    registerIfEnabled(SupportedLanguage.ro, aConfiguration, configure(new RomanianAnalyzer()));
    registerIfEnabled(SupportedLanguage.ru, aConfiguration, configure(new RussianAnalyzer()));
    registerIfEnabled(SupportedLanguage.sv, aConfiguration, configure(new SwedishAnalyzer()));
    registerIfEnabled(SupportedLanguage.th, aConfiguration, configure(new ThaiAnalyzer()));
    registerIfEnabled(SupportedLanguage.tr, aConfiguration, configure(new TurkishAnalyzer()));
}

From source file:practica2_1.Practica2_1.java

public static void main(String[] args) throws IOException, TikaException {
    Analyzer[] analizadores = { new WhitespaceAnalyzer(), new SimpleAnalyzer(), new StandardAnalyzer(),
            new EnglishAnalyzer(), new FrenchAnalyzer(), new FinnishAnalyzer(),
            CustomAnalyzer.builder(Paths.get("")).withTokenizer(StandardTokenizerFactory.class)
                    .addTokenFilter(LowerCaseFilterFactory.class)
                    .addTokenFilter(StopFilterFactory.class, "words",
                            "C:\\Users\\Javi\\Desktop\\RI\\practica2\\stopwords.txt" /*, "ignoreCase", "false", "words", "stopwords.txt", "format", "wordset"*/)
                    .build() };//from www  .j ava 2s.  c o m
    File f = new File(args[0]);
    Tika tika = new Tika();
    if (f.exists()) {
        File[] ficheros = f.listFiles();
        for (int i = 0; i < ficheros.length; i++) {
            System.out.println(ficheros[i].getAbsolutePath());
        }
        for (int i = 0; i < ficheros.length; i++) {
            File f2 = new File(ficheros[i].getAbsolutePath());
            String text = tika.parseToString(f2);
            String language = identifyLanguage(text);
            List<String> result = new ArrayList<String>();
            String name = ficheros[i].getAbsolutePath();
            if (name.indexOf(".java") != -1) {
                result = tokenizeString(analizadores[6], text);
                process(result, ficheros[i].getAbsolutePath() + "_codeAnalyzer.txt");
            } else if (name.indexOf(".java") == -1)
                for (int j = 0; j < analizadores.length - 1; j++) {
                    List<String> result2 = new ArrayList<String>();
                    result2 = tokenizeString(analizadores[i], text);
                    if (j == 0)
                        process(result2, name + "_WhitespaceAnalyzer");
                    else if (j == 1)
                        process(result2, name + "_SimpleAnalyzer");
                    else if (j == 2)
                        process(result2, name + "_StandardAnalyzer");
                    else if (j == 3 && language.equals("en"))
                        process(result2, name + "_englishAnalyzer");
                    else if (j == 4 && language.equals("fr"))
                        process(result2, name + "_frenchAnalyzer");
                    else if (j == 5 && language.equals("fi"))
                        process(result2, name + "_finnishAnalyzer");
                }
        }
    }
}