Example usage for org.apache.lucene.analysis.de GermanAnalyzer GermanAnalyzer

List of usage examples for org.apache.lucene.analysis.de GermanAnalyzer GermanAnalyzer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.de GermanAnalyzer GermanAnalyzer.

Prototype

public GermanAnalyzer() 

Source Link

Document

Builds an analyzer with the default stop words: #getDefaultStopSet() .

Usage

From source file:de.citec.lucene.CreateIndex.java

public static void main(String[] args) throws IOException {
    Analyzer analyzer = null;/*from   w ww  .ja v  a 2s  .c  om*/

    List<String> files = new ArrayList<>();
    files.add("/Users/swalter/Documents/EsaDeutsch/new_copus_german.txt");
    String indexPath = "/Users/swalter/Documents/EsaDeutsch/Index/";
    Language language = Language.DE;
    Directory dir = FSDirectory.open(Paths.get(indexPath));

    if (language.equals(Language.DE))
        analyzer = new GermanAnalyzer();
    if (language.equals(Language.ES))
        analyzer = new SpanishAnalyzer();
    if (language.equals(Language.EN))
        analyzer = new EnglishAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(12000);
    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        files.forEach(f -> {
            try {
                indexDocs(writer, Paths.get(f));
            } catch (IOException ex) {
                Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex);
            }
        });

    }
    System.out.println(counter);

}

From source file:de.citec.lucene.SearchIndex.java

public SearchIndex(String pathToIndex, Language input_language) throws IOException {
    if (input_language.equals(DE))
        this.analyzer = new GermanAnalyzer();
    if (input_language.equals(EN))
        this.analyzer = new EnglishAnalyzer();
    this.language = input_language;
    this.reader = DirectoryReader.open(FSDirectory.open(Paths.get(pathToIndex)));
    this.searcher = new IndexSearcher(reader);

}

From source file:de.citec.sc.sentence.preprocessing.lucene.CreateIndex.java

public static void main(String[] args) throws IOException {
    Analyzer analyzer = null;//from w w  w.  j a va2s .c  o m

    List<String> files = new ArrayList<>();
    files.add("/Users/swalter/Downloads/german_sentences_reduced.txt");
    String indexPath = "/Users/swalter/Index/GermanIndexReduced/";
    Language language = Language.DE;
    Directory dir = FSDirectory.open(Paths.get(indexPath));

    //files.add("/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/idealSents_mecab_jdepp_rmvPunct_CoNLLU");
    //String indexPath = "/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/index";
    //Language language = Language.JA;
    //Directory dir = FSDirectory.open(Paths.get(indexPath));

    if (language.equals(Language.DE))
        analyzer = new GermanAnalyzer();
    if (language.equals(Language.ES))
        analyzer = new SpanishAnalyzer();
    if (language.equals(Language.EN))
        analyzer = new EnglishAnalyzer();
    if (language.equals(Language.JA))
        analyzer = new JapaneseAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(12000);
    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        files.forEach(f -> {
            try {
                indexDocs(writer, Paths.get(f), language);
            } catch (IOException ex) {
                Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex);
            }
        });

    }

}

From source file:de.mirkosertic.desktopsearch.AnalyzerCache.java

License:Open Source License

public AnalyzerCache(Configuration aConfiguration) {
    standardAnalyzer = configure(new StandardAnalyzer());
    analyzerByLanguage = new HashMap<>();

    registerIfEnabled(SupportedLanguage.ar, aConfiguration, configure(new ArabicAnalyzer()));
    registerIfEnabled(SupportedLanguage.bg, aConfiguration, configure(new BulgarianAnalyzer()));
    registerIfEnabled(SupportedLanguage.br, aConfiguration, configure(new BrazilianAnalyzer()));
    registerIfEnabled(SupportedLanguage.ca, aConfiguration, configure(new CatalanAnalyzer()));
    registerIfEnabled(SupportedLanguage.ckb, aConfiguration, configure(new SoraniAnalyzer()));
    registerIfEnabled(SupportedLanguage.cz, aConfiguration, configure(new CzechAnalyzer()));
    registerIfEnabled(SupportedLanguage.da, aConfiguration, configure(new DanishAnalyzer()));
    registerIfEnabled(SupportedLanguage.de, aConfiguration, configure(new GermanAnalyzer()));
    registerIfEnabled(SupportedLanguage.el, aConfiguration, configure(new GreekAnalyzer()));
    registerIfEnabled(SupportedLanguage.en, aConfiguration, configure(new EnglishAnalyzer()));
    registerIfEnabled(SupportedLanguage.es, aConfiguration, configure(new SpanishAnalyzer()));
    registerIfEnabled(SupportedLanguage.eu, aConfiguration, configure(new BasqueAnalyzer()));
    registerIfEnabled(SupportedLanguage.fa, aConfiguration, configure(new PersianAnalyzer()));
    registerIfEnabled(SupportedLanguage.fi, aConfiguration, configure(new FinnishAnalyzer()));
    registerIfEnabled(SupportedLanguage.fr, aConfiguration, configure(new FrenchAnalyzer()));
    registerIfEnabled(SupportedLanguage.ga, aConfiguration, configure(new IrishAnalyzer()));
    registerIfEnabled(SupportedLanguage.gl, aConfiguration, configure(new GalicianAnalyzer()));
    registerIfEnabled(SupportedLanguage.hi, aConfiguration, configure(new HindiAnalyzer()));
    registerIfEnabled(SupportedLanguage.hu, aConfiguration, configure(new HungarianAnalyzer()));
    registerIfEnabled(SupportedLanguage.hy, aConfiguration, configure(new ArmenianAnalyzer()));
    registerIfEnabled(SupportedLanguage.id, aConfiguration, configure(new IndonesianAnalyzer()));
    registerIfEnabled(SupportedLanguage.it, aConfiguration, configure(new ItalianAnalyzer()));
    registerIfEnabled(SupportedLanguage.lv, aConfiguration, configure(new LatvianAnalyzer()));
    registerIfEnabled(SupportedLanguage.nl, aConfiguration, configure(new DutchAnalyzer()));
    registerIfEnabled(SupportedLanguage.no, aConfiguration, configure(new NorwegianAnalyzer()));
    registerIfEnabled(SupportedLanguage.pt, aConfiguration, configure(new PortugueseAnalyzer()));
    registerIfEnabled(SupportedLanguage.ro, aConfiguration, configure(new RomanianAnalyzer()));
    registerIfEnabled(SupportedLanguage.ru, aConfiguration, configure(new RussianAnalyzer()));
    registerIfEnabled(SupportedLanguage.sv, aConfiguration, configure(new SwedishAnalyzer()));
    registerIfEnabled(SupportedLanguage.th, aConfiguration, configure(new ThaiAnalyzer()));
    registerIfEnabled(SupportedLanguage.tr, aConfiguration, configure(new TurkishAnalyzer()));
}

From source file:de.mirkosertic.desktopsearch.QueryParserTest.java

License:Open Source License

@Test
public void testParse() throws IOException {
    GermanAnalyzer theAnalyzer = new GermanAnalyzer();
    theAnalyzer.setVersion(IndexFields.LUCENE_VERSION);

    QueryParser theParser = new QueryParser(theAnalyzer);
    Query theQuery = theParser.parse("der a +b -c dudel* ~nudel -~yahoo -*wildcard hello", "field");

    assertEquals(/* ww  w . j ava  2 s.  c o  m*/
            "spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 0, true)^61.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 0, false)^60.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 1, false)^59.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 2, false)^58.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 3, false)^57.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 4, false)^56.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 5, false)^55.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 6, false)^54.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 7, false)^53.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 8, false)^52.0 spanNear([field:a, field:b, SpanMultiTermQueryWrapper(field:dudel*), SpanMultiTermQueryWrapper(field:~nudel~2), field:hello], 9, false)^51.0 +field:a +field:b +field:dudel* +field:~nudel~2 +field:hello -field:c -field:~yahoo~2 -field:*wildcard",
            theQuery.toString());
}

From source file:io.crate.lucene.match.MatchQueryBuilderTest.java

License:Apache License

@Test
public void testCrossFieldMatchType() throws Exception {
    Analyzer analyzer = new GermanAnalyzer();
    MapperService.SmartNameFieldMappers smartNameFieldMappers = mock(MapperService.SmartNameFieldMappers.class);
    when(smartNameFieldMappers.hasMapper()).thenReturn(true);
    FieldMapper fieldMapper = mock(FieldMapper.class, Answers.RETURNS_MOCKS.get());
    when(smartNameFieldMappers.mapper()).thenReturn(fieldMapper);
    when(fieldMapper.searchAnalyzer()).thenReturn(analyzer);

    MapperService mapperService = mock(MapperService.class);
    when(mapperService.smartName(anyString())).thenReturn(smartNameFieldMappers);
    when(mapperService.searchAnalyzer()).thenReturn(analyzer);

    MatchQueryBuilder builder = new io.crate.lucene.match.MultiMatchQueryBuilder(mapperService, cache,
            new BytesRef("cross_fields"), Collections.emptyMap());
    Map<String, Object> fields = MapBuilder.<String, Object>newMapBuilder().put("col1", null).put("col2", null)
            .map();//from ww w.  ja  va 2 s  . co m
    Query query = builder.query(fields, new BytesRef("foo bar"));
    assertThat(query, instanceOf(BooleanQuery.class));

    Query innerQuery = ((BooleanQuery) query).clauses().get(0).getQuery();
    assertThat(innerQuery, instanceOf(BlendedTermQuery.class));
}

From source file:io.crate.lucene.match.MatchQueryBuilderTest.java

License:Apache License

private MapperService mockMapperService() {
    Analyzer analyzer = new GermanAnalyzer();
    MapperService mapperService = mock(MapperService.class);
    when(mapperService.searchAnalyzer()).thenReturn(analyzer);
    return mapperService;
}

From source file:lucee.runtime.search.lucene2.SearchUtil.java

License:Open Source License

public static Analyzer getAnalyzer(String language) throws SearchException {
    if (language == null)
        language = "english";
    else// w ww.  j  a va2  s .  co  m
        language = language.toLowerCase().trim();
    language = lucee.runtime.search.SearchUtil.translateLanguage(language);

    Analyzer analyzer = analyzers.get(language);
    if (analyzer != null)
        return analyzer;

    if (language.equals("english"))
        analyzer = new StandardAnalyzer();
    else if (language.equals("german"))
        analyzer = new GermanAnalyzer();
    else if (language.equals("russian"))
        analyzer = new RussianAnalyzer();
    else if (language.equals("dutch"))
        analyzer = new DutchAnalyzer();
    else if (language.equals("french"))
        analyzer = new FrenchAnalyzer();
    else if (language.equals("norwegian"))
        analyzer = new NorwegianAnalyzer();
    else if (language.equals("portuguese"))
        analyzer = new PortugueseAnalyzer();
    else if (language.equals("spanish"))
        analyzer = new SpanishAnalyzer();
    else if (language.equals("brazilian"))
        analyzer = new BrazilianAnalyzer();
    else if (language.equals("chinese"))
        analyzer = new ChineseAnalyzer();
    else if (language.startsWith("czech"))
        analyzer = new CzechAnalyzer();
    else if (language.equals("greek"))
        analyzer = new GreekAnalyzer();
    else if (language.equals("thai"))
        analyzer = new ThaiAnalyzer();
    else if (language.equals("japanese"))
        analyzer = new CJKAnalyzer();
    else if (language.equals("korean"))
        analyzer = new CJKAnalyzer();

    else if (language.equals("italian"))
        analyzer = new ItalianAnalyzer();
    else if (language.equals("danish"))
        analyzer = new DanishAnalyzer();
    else if (language.equals("norwegian"))
        analyzer = new NorwegianAnalyzer();
    else if (language.equals("finnish"))
        analyzer = new SnowballAnalyzer("Finnish");
    else if (language.equals("swedish"))
        analyzer = new SnowballAnalyzer("Swedish");

    else {
        String clazzName = "org.apache.lucene.analysis.el." + StringUtil.ucFirst(language.trim().toLowerCase())
                + "Analyzer;";
        Object o = ClassUtil.loadInstance(clazzName, (Object) null);
        if (o == null) {
            clazzName = "lucee.runtime.search.lucene2.analyzer."
                    + StringUtil.ucFirst(language.trim().toLowerCase()) + "Analyzer";
            o = ClassUtil.loadInstance(clazzName, (Object) null);//Class.orName(clazzName).newInstance();
        }
        if (o instanceof Analyzer)
            analyzer = (Analyzer) o;
        else if (o == null)
            throw new SearchException("can't create Language Analyzer for Lanuage " + language
                    + ", make Analyzer [" + clazzName + "] available");
        else
            throw new SearchException("can't create Language Analyzer for Lanuage " + language + ", Analyzer ["
                    + clazzName + "] is of invalid type");
    }
    analyzers.put(language, analyzer);
    return analyzer;
}