de.mirkosertic.desktopsearch.AnalyzerCache.java Source code

Java tutorial

Introduction

Here is the source code for de.mirkosertic.desktopsearch.AnalyzerCache.java

Source

/**
 * FreeDesktopSearch - A Search Engine for your Desktop
 * Copyright (C) 2013 Mirko Sertic
 *
 * This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
package de.mirkosertic.desktopsearch;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.da.DanishAnalyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.ga.IrishAnalyzer;
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.apache.lucene.analysis.hi.HindiAnalyzer;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

class AnalyzerCache {

    private static final String FIELD_PREFIX = IndexFields.CONTENT + "_";

    private final Map<SupportedLanguage, Analyzer> analyzerByLanguage;
    private final Analyzer standardAnalyzer;

    private static Analyzer configure(Analyzer aAnalyzer) {
        aAnalyzer.setVersion(IndexFields.LUCENE_VERSION);
        return aAnalyzer;
    }

    public AnalyzerCache(Configuration aConfiguration) {
        standardAnalyzer = configure(new StandardAnalyzer());
        analyzerByLanguage = new HashMap<>();

        registerIfEnabled(SupportedLanguage.ar, aConfiguration, configure(new ArabicAnalyzer()));
        registerIfEnabled(SupportedLanguage.bg, aConfiguration, configure(new BulgarianAnalyzer()));
        registerIfEnabled(SupportedLanguage.br, aConfiguration, configure(new BrazilianAnalyzer()));
        registerIfEnabled(SupportedLanguage.ca, aConfiguration, configure(new CatalanAnalyzer()));
        registerIfEnabled(SupportedLanguage.ckb, aConfiguration, configure(new SoraniAnalyzer()));
        registerIfEnabled(SupportedLanguage.cz, aConfiguration, configure(new CzechAnalyzer()));
        registerIfEnabled(SupportedLanguage.da, aConfiguration, configure(new DanishAnalyzer()));
        registerIfEnabled(SupportedLanguage.de, aConfiguration, configure(new GermanAnalyzer()));
        registerIfEnabled(SupportedLanguage.el, aConfiguration, configure(new GreekAnalyzer()));
        registerIfEnabled(SupportedLanguage.en, aConfiguration, configure(new EnglishAnalyzer()));
        registerIfEnabled(SupportedLanguage.es, aConfiguration, configure(new SpanishAnalyzer()));
        registerIfEnabled(SupportedLanguage.eu, aConfiguration, configure(new BasqueAnalyzer()));
        registerIfEnabled(SupportedLanguage.fa, aConfiguration, configure(new PersianAnalyzer()));
        registerIfEnabled(SupportedLanguage.fi, aConfiguration, configure(new FinnishAnalyzer()));
        registerIfEnabled(SupportedLanguage.fr, aConfiguration, configure(new FrenchAnalyzer()));
        registerIfEnabled(SupportedLanguage.ga, aConfiguration, configure(new IrishAnalyzer()));
        registerIfEnabled(SupportedLanguage.gl, aConfiguration, configure(new GalicianAnalyzer()));
        registerIfEnabled(SupportedLanguage.hi, aConfiguration, configure(new HindiAnalyzer()));
        registerIfEnabled(SupportedLanguage.hu, aConfiguration, configure(new HungarianAnalyzer()));
        registerIfEnabled(SupportedLanguage.hy, aConfiguration, configure(new ArmenianAnalyzer()));
        registerIfEnabled(SupportedLanguage.id, aConfiguration, configure(new IndonesianAnalyzer()));
        registerIfEnabled(SupportedLanguage.it, aConfiguration, configure(new ItalianAnalyzer()));
        registerIfEnabled(SupportedLanguage.lv, aConfiguration, configure(new LatvianAnalyzer()));
        registerIfEnabled(SupportedLanguage.nl, aConfiguration, configure(new DutchAnalyzer()));
        registerIfEnabled(SupportedLanguage.no, aConfiguration, configure(new NorwegianAnalyzer()));
        registerIfEnabled(SupportedLanguage.pt, aConfiguration, configure(new PortugueseAnalyzer()));
        registerIfEnabled(SupportedLanguage.ro, aConfiguration, configure(new RomanianAnalyzer()));
        registerIfEnabled(SupportedLanguage.ru, aConfiguration, configure(new RussianAnalyzer()));
        registerIfEnabled(SupportedLanguage.sv, aConfiguration, configure(new SwedishAnalyzer()));
        registerIfEnabled(SupportedLanguage.th, aConfiguration, configure(new ThaiAnalyzer()));
        registerIfEnabled(SupportedLanguage.tr, aConfiguration, configure(new TurkishAnalyzer()));
    }

    private void registerIfEnabled(SupportedLanguage aLanguage, Configuration aConfiguration, Analyzer aAnalyzer) {
        if (aConfiguration.getEnabledLanguages().contains(aLanguage)) {
            analyzerByLanguage.put(aLanguage, aAnalyzer);
        }
    }

    public String getFieldNameFor(SupportedLanguage aLanguage) {
        return FIELD_PREFIX + aLanguage.name();
    }

    public Analyzer getAnalyzer() {
        Map<String, Analyzer> theFieldAnalyzer = new HashMap<>();
        analyzerByLanguage.entrySet().stream()
                .forEach(e -> theFieldAnalyzer.put(getFieldNameFor(e.getKey()), e.getValue()));
        return new PerFieldAnalyzerWrapper(standardAnalyzer, theFieldAnalyzer);
    }

    public boolean supportsLanguage(SupportedLanguage aLanguage) {
        return analyzerByLanguage.containsKey(aLanguage);
    }

    public String[] getAllFieldNames() {
        List<String> theFieldNames = new ArrayList<>();
        theFieldNames.add(IndexFields.CONTENT);
        analyzerByLanguage.entrySet().stream().forEach(e -> theFieldNames.add(getFieldNameFor(e.getKey())));
        return theFieldNames.toArray(new String[theFieldNames.size()]);
    }
}