List of usage examples for org.apache.lucene.analysis.fi FinnishAnalyzer FinnishAnalyzer
public FinnishAnalyzer()
From source file:de.mirkosertic.desktopsearch.AnalyzerCache.java
License:Open Source License
public AnalyzerCache(Configuration aConfiguration) { standardAnalyzer = configure(new StandardAnalyzer()); analyzerByLanguage = new HashMap<>(); registerIfEnabled(SupportedLanguage.ar, aConfiguration, configure(new ArabicAnalyzer())); registerIfEnabled(SupportedLanguage.bg, aConfiguration, configure(new BulgarianAnalyzer())); registerIfEnabled(SupportedLanguage.br, aConfiguration, configure(new BrazilianAnalyzer())); registerIfEnabled(SupportedLanguage.ca, aConfiguration, configure(new CatalanAnalyzer())); registerIfEnabled(SupportedLanguage.ckb, aConfiguration, configure(new SoraniAnalyzer())); registerIfEnabled(SupportedLanguage.cz, aConfiguration, configure(new CzechAnalyzer())); registerIfEnabled(SupportedLanguage.da, aConfiguration, configure(new DanishAnalyzer())); registerIfEnabled(SupportedLanguage.de, aConfiguration, configure(new GermanAnalyzer())); registerIfEnabled(SupportedLanguage.el, aConfiguration, configure(new GreekAnalyzer())); registerIfEnabled(SupportedLanguage.en, aConfiguration, configure(new EnglishAnalyzer())); registerIfEnabled(SupportedLanguage.es, aConfiguration, configure(new SpanishAnalyzer())); registerIfEnabled(SupportedLanguage.eu, aConfiguration, configure(new BasqueAnalyzer())); registerIfEnabled(SupportedLanguage.fa, aConfiguration, configure(new PersianAnalyzer())); registerIfEnabled(SupportedLanguage.fi, aConfiguration, configure(new FinnishAnalyzer())); registerIfEnabled(SupportedLanguage.fr, aConfiguration, configure(new FrenchAnalyzer())); registerIfEnabled(SupportedLanguage.ga, aConfiguration, configure(new IrishAnalyzer())); registerIfEnabled(SupportedLanguage.gl, aConfiguration, configure(new GalicianAnalyzer())); registerIfEnabled(SupportedLanguage.hi, aConfiguration, configure(new HindiAnalyzer())); registerIfEnabled(SupportedLanguage.hu, aConfiguration, configure(new HungarianAnalyzer())); registerIfEnabled(SupportedLanguage.hy, aConfiguration, configure(new ArmenianAnalyzer())); registerIfEnabled(SupportedLanguage.id, aConfiguration, configure(new IndonesianAnalyzer())); registerIfEnabled(SupportedLanguage.it, aConfiguration, configure(new ItalianAnalyzer())); registerIfEnabled(SupportedLanguage.lv, aConfiguration, configure(new LatvianAnalyzer())); registerIfEnabled(SupportedLanguage.nl, aConfiguration, configure(new DutchAnalyzer())); registerIfEnabled(SupportedLanguage.no, aConfiguration, configure(new NorwegianAnalyzer())); registerIfEnabled(SupportedLanguage.pt, aConfiguration, configure(new PortugueseAnalyzer())); registerIfEnabled(SupportedLanguage.ro, aConfiguration, configure(new RomanianAnalyzer())); registerIfEnabled(SupportedLanguage.ru, aConfiguration, configure(new RussianAnalyzer())); registerIfEnabled(SupportedLanguage.sv, aConfiguration, configure(new SwedishAnalyzer())); registerIfEnabled(SupportedLanguage.th, aConfiguration, configure(new ThaiAnalyzer())); registerIfEnabled(SupportedLanguage.tr, aConfiguration, configure(new TurkishAnalyzer())); }
From source file:practica2_1.Practica2_1.java
public static void main(String[] args) throws IOException, TikaException { Analyzer[] analizadores = { new WhitespaceAnalyzer(), new SimpleAnalyzer(), new StandardAnalyzer(), new EnglishAnalyzer(), new FrenchAnalyzer(), new FinnishAnalyzer(), CustomAnalyzer.builder(Paths.get("")).withTokenizer(StandardTokenizerFactory.class) .addTokenFilter(LowerCaseFilterFactory.class) .addTokenFilter(StopFilterFactory.class, "words", "C:\\Users\\Javi\\Desktop\\RI\\practica2\\stopwords.txt" /*, "ignoreCase", "false", "words", "stopwords.txt", "format", "wordset"*/) .build() };//from www .j ava 2s. c o m File f = new File(args[0]); Tika tika = new Tika(); if (f.exists()) { File[] ficheros = f.listFiles(); for (int i = 0; i < ficheros.length; i++) { System.out.println(ficheros[i].getAbsolutePath()); } for (int i = 0; i < ficheros.length; i++) { File f2 = new File(ficheros[i].getAbsolutePath()); String text = tika.parseToString(f2); String language = identifyLanguage(text); List<String> result = new ArrayList<String>(); String name = ficheros[i].getAbsolutePath(); if (name.indexOf(".java") != -1) { result = tokenizeString(analizadores[6], text); process(result, ficheros[i].getAbsolutePath() + "_codeAnalyzer.txt"); } else if (name.indexOf(".java") == -1) for (int j = 0; j < analizadores.length - 1; j++) { List<String> result2 = new ArrayList<String>(); result2 = tokenizeString(analizadores[i], text); if (j == 0) process(result2, name + "_WhitespaceAnalyzer"); else if (j == 1) process(result2, name + "_SimpleAnalyzer"); else if (j == 2) process(result2, name + "_StandardAnalyzer"); else if (j == 3 && language.equals("en")) process(result2, name + "_englishAnalyzer"); else if (j == 4 && language.equals("fr")) process(result2, name + "_frenchAnalyzer"); else if (j == 5 && language.equals("fi")) process(result2, name + "_finnishAnalyzer"); } } } }