List of usage examples for org.apache.lucene.analysis.de GermanAnalyzer GermanAnalyzer
public GermanAnalyzer()
From source file:de.citec.lucene.CreateIndex.java
public static void main(String[] args) throws IOException { Analyzer analyzer = null;/*from w ww .ja v a 2s .c om*/ List<String> files = new ArrayList<>(); files.add("/Users/swalter/Documents/EsaDeutsch/new_copus_german.txt"); String indexPath = "/Users/swalter/Documents/EsaDeutsch/Index/"; Language language = Language.DE; Directory dir = FSDirectory.open(Paths.get(indexPath)); if (language.equals(Language.DE)) analyzer = new GermanAnalyzer(); if (language.equals(Language.ES)) analyzer = new SpanishAnalyzer(); if (language.equals(Language.EN)) analyzer = new EnglishAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(12000); try (IndexWriter writer = new IndexWriter(dir, iwc)) { files.forEach(f -> { try { indexDocs(writer, Paths.get(f)); } catch (IOException ex) { Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex); } }); } System.out.println(counter); }
From source file:de.citec.lucene.SearchIndex.java
public SearchIndex(String pathToIndex, Language input_language) throws IOException { if (input_language.equals(DE)) this.analyzer = new GermanAnalyzer(); if (input_language.equals(EN)) this.analyzer = new EnglishAnalyzer(); this.language = input_language; this.reader = DirectoryReader.open(FSDirectory.open(Paths.get(pathToIndex))); this.searcher = new IndexSearcher(reader); }
From source file:de.citec.sc.sentence.preprocessing.lucene.CreateIndex.java
public static void main(String[] args) throws IOException { Analyzer analyzer = null;//from w w w. j a va2s .c o m List<String> files = new ArrayList<>(); files.add("/Users/swalter/Downloads/german_sentences_reduced.txt"); String indexPath = "/Users/swalter/Index/GermanIndexReduced/"; Language language = Language.DE; Directory dir = FSDirectory.open(Paths.get(indexPath)); //files.add("/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/idealSents_mecab_jdepp_rmvPunct_CoNLLU"); //String indexPath = "/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/index"; //Language language = Language.JA; //Directory dir = FSDirectory.open(Paths.get(indexPath)); if (language.equals(Language.DE)) analyzer = new GermanAnalyzer(); if (language.equals(Language.ES)) analyzer = new SpanishAnalyzer(); if (language.equals(Language.EN)) analyzer = new EnglishAnalyzer(); if (language.equals(Language.JA)) analyzer = new JapaneseAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(12000); try (IndexWriter writer = new IndexWriter(dir, iwc)) { files.forEach(f -> { try { indexDocs(writer, Paths.get(f), language); } catch (IOException ex) { Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex); } }); } }
From source file:de.mirkosertic.desktopsearch.AnalyzerCache.java
License:Open Source License
public AnalyzerCache(Configuration aConfiguration) { standardAnalyzer = configure(new StandardAnalyzer()); analyzerByLanguage = new HashMap<>(); registerIfEnabled(SupportedLanguage.ar, aConfiguration, configure(new ArabicAnalyzer())); registerIfEnabled(SupportedLanguage.bg, aConfiguration, configure(new BulgarianAnalyzer())); registerIfEnabled(SupportedLanguage.br, aConfiguration, configure(new BrazilianAnalyzer())); registerIfEnabled(SupportedLanguage.ca, aConfiguration, configure(new CatalanAnalyzer())); registerIfEnabled(SupportedLanguage.ckb, aConfiguration, configure(new SoraniAnalyzer())); registerIfEnabled(SupportedLanguage.cz, aConfiguration, configure(new CzechAnalyzer())); registerIfEnabled(SupportedLanguage.da, aConfiguration, configure(new DanishAnalyzer())); registerIfEnabled(SupportedLanguage.de, aConfiguration, configure(new GermanAnalyzer())); registerIfEnabled(SupportedLanguage.el, aConfiguration, configure(new GreekAnalyzer())); registerIfEnabled(SupportedLanguage.en, aConfiguration, configure(new EnglishAnalyzer())); registerIfEnabled(SupportedLanguage.es, aConfiguration, configure(new SpanishAnalyzer())); registerIfEnabled(SupportedLanguage.eu, aConfiguration, configure(new BasqueAnalyzer())); registerIfEnabled(SupportedLanguage.fa, aConfiguration, configure(new PersianAnalyzer())); registerIfEnabled(SupportedLanguage.fi, aConfiguration, configure(new FinnishAnalyzer())); registerIfEnabled(SupportedLanguage.fr, aConfiguration, configure(new FrenchAnalyzer())); registerIfEnabled(SupportedLanguage.ga, aConfiguration, configure(new IrishAnalyzer())); registerIfEnabled(SupportedLanguage.gl, aConfiguration, configure(new GalicianAnalyzer())); registerIfEnabled(SupportedLanguage.hi, aConfiguration, configure(new HindiAnalyzer())); registerIfEnabled(SupportedLanguage.hu, aConfiguration, configure(new HungarianAnalyzer())); registerIfEnabled(SupportedLanguage.hy, aConfiguration, configure(new ArmenianAnalyzer())); registerIfEnabled(SupportedLanguage.id, aConfiguration, configure(new IndonesianAnalyzer())); registerIfEnabled(SupportedLanguage.it, aConfiguration, configure(new ItalianAnalyzer())); registerIfEnabled(SupportedLanguage.lv, aConfiguration, configure(new LatvianAnalyzer())); registerIfEnabled(SupportedLanguage.nl, aConfiguration, configure(new DutchAnalyzer())); registerIfEnabled(SupportedLanguage.no, aConfiguration, configure(new NorwegianAnalyzer())); registerIfEnabled(SupportedLanguage.pt, aConfiguration, configure(new PortugueseAnalyzer())); registerIfEnabled(SupportedLanguage.ro, aConfiguration, configure(new RomanianAnalyzer())); registerIfEnabled(SupportedLanguage.ru, aConfiguration, configure(new RussianAnalyzer())); registerIfEnabled(SupportedLanguage.sv, aConfiguration, configure(new SwedishAnalyzer())); registerIfEnabled(SupportedLanguage.th, aConfiguration, configure(new ThaiAnalyzer())); registerIfEnabled(SupportedLanguage.tr, aConfiguration, configure(new TurkishAnalyzer())); }
From source file:de.mirkosertic.desktopsearch.QueryParserTest.java
License:Open Source License
@Test public void testParse() throws IOException { GermanAnalyzer theAnalyzer = new GermanAnalyzer(); theAnalyzer.setVersion(IndexFields.LUCENE_VERSION); QueryParser theParser = new QueryParser(theAnalyzer); Query theQuery = theParser.parse("der a +b -c dudel* ~nudel -~yahoo -*wildcard hello", "field"); assertEquals(/* ww w . j ava 2 s. c o m*/ "spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 0, true)^61.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 0, false)^60.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 1, false)^59.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 2, false)^58.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 3, false)^57.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 4, false)^56.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 5, false)^55.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 6, false)^54.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 7, false)^53.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 8, false)^52.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 9, false)^51.0 +field:a +field:b +field:dudel* +field:~nudel~2 +field:hello -field:c -field:~yahoo~2 -field:*wildcard", theQuery.toString()); }
From source file:io.crate.lucene.match.MatchQueryBuilderTest.java
License:Apache License
@Test public void testCrossFieldMatchType() throws Exception { Analyzer analyzer = new GermanAnalyzer(); MapperService.SmartNameFieldMappers smartNameFieldMappers = mock(MapperService.SmartNameFieldMappers.class); when(smartNameFieldMappers.hasMapper()).thenReturn(true); FieldMapper fieldMapper = mock(FieldMapper.class, Answers.RETURNS_MOCKS.get()); when(smartNameFieldMappers.mapper()).thenReturn(fieldMapper); when(fieldMapper.searchAnalyzer()).thenReturn(analyzer); MapperService mapperService = mock(MapperService.class); when(mapperService.smartName(anyString())).thenReturn(smartNameFieldMappers); when(mapperService.searchAnalyzer()).thenReturn(analyzer); MatchQueryBuilder builder = new io.crate.lucene.match.MultiMatchQueryBuilder(mapperService, cache, new BytesRef("cross_fields"), Collections.emptyMap()); Map<String, Object> fields = MapBuilder.<String, Object>newMapBuilder().put("col1", null).put("col2", null) .map();//from ww w. ja va 2 s . co m Query query = builder.query(fields, new BytesRef("foo bar")); assertThat(query, instanceOf(BooleanQuery.class)); Query innerQuery = ((BooleanQuery) query).clauses().get(0).getQuery(); assertThat(innerQuery, instanceOf(BlendedTermQuery.class)); }
From source file:io.crate.lucene.match.MatchQueryBuilderTest.java
License:Apache License
private MapperService mockMapperService() { Analyzer analyzer = new GermanAnalyzer(); MapperService mapperService = mock(MapperService.class); when(mapperService.searchAnalyzer()).thenReturn(analyzer); return mapperService; }
From source file:lucee.runtime.search.lucene2.SearchUtil.java
License:Open Source License
public static Analyzer getAnalyzer(String language) throws SearchException { if (language == null) language = "english"; else// w ww. j a va2 s . co m language = language.toLowerCase().trim(); language = lucee.runtime.search.SearchUtil.translateLanguage(language); Analyzer analyzer = analyzers.get(language); if (analyzer != null) return analyzer; if (language.equals("english")) analyzer = new StandardAnalyzer(); else if (language.equals("german")) analyzer = new GermanAnalyzer(); else if (language.equals("russian")) analyzer = new RussianAnalyzer(); else if (language.equals("dutch")) analyzer = new DutchAnalyzer(); else if (language.equals("french")) analyzer = new FrenchAnalyzer(); else if (language.equals("norwegian")) analyzer = new NorwegianAnalyzer(); else if (language.equals("portuguese")) analyzer = new PortugueseAnalyzer(); else if (language.equals("spanish")) analyzer = new SpanishAnalyzer(); else if (language.equals("brazilian")) analyzer = new BrazilianAnalyzer(); else if (language.equals("chinese")) analyzer = new ChineseAnalyzer(); else if (language.startsWith("czech")) analyzer = new CzechAnalyzer(); else if (language.equals("greek")) analyzer = new GreekAnalyzer(); else if (language.equals("thai")) analyzer = new ThaiAnalyzer(); else if (language.equals("japanese")) analyzer = new CJKAnalyzer(); else if (language.equals("korean")) analyzer = new CJKAnalyzer(); else if (language.equals("italian")) analyzer = new ItalianAnalyzer(); else if (language.equals("danish")) analyzer = new DanishAnalyzer(); else if (language.equals("norwegian")) analyzer = new NorwegianAnalyzer(); else if (language.equals("finnish")) analyzer = new SnowballAnalyzer("Finnish"); else if (language.equals("swedish")) analyzer = new SnowballAnalyzer("Swedish"); else { String clazzName = "org.apache.lucene.analysis.el." + StringUtil.ucFirst(language.trim().toLowerCase()) + "Analyzer;"; Object o = ClassUtil.loadInstance(clazzName, (Object) null); if (o == null) { clazzName = "lucee.runtime.search.lucene2.analyzer." + StringUtil.ucFirst(language.trim().toLowerCase()) + "Analyzer"; o = ClassUtil.loadInstance(clazzName, (Object) null);//Class.orName(clazzName).newInstance(); } if (o instanceof Analyzer) analyzer = (Analyzer) o; else if (o == null) throw new SearchException("can't create Language Analyzer for Lanuage " + language + ", make Analyzer [" + clazzName + "] available"); else throw new SearchException("can't create Language Analyzer for Lanuage " + language + ", Analyzer [" + clazzName + "] is of invalid type"); } analyzers.put(language, analyzer); return analyzer; }