org.elasticsearch.index.analysis.morphology.SimpleMorphologyAnalysisTests.java Source code

Java tutorial

Introduction

Here is the source code for org.elasticsearch.index.analysis.morphology.SimpleMorphologyAnalysisTests.java

Source

/*
 * Copyright 2012 Igor Motov
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.elasticsearch.index.analysis.morphology;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.morphology.LuceneMorphology;
import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer;
import org.apache.lucene.morphology.analyzer.MorphologyFilter;
import org.apache.lucene.morphology.english.EnglishAnalyzer;
import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
import org.apache.lucene.morphology.russian.RussianAnalyzer;
import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
import org.elasticsearch.common.inject.Injector;
import org.elasticsearch.common.inject.ModulesBuilder;
import org.elasticsearch.common.io.FastStringReader;
import org.elasticsearch.common.settings.SettingsModule;
import org.elasticsearch.env.Environment;
import org.elasticsearch.env.EnvironmentModule;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexNameModule;
import org.elasticsearch.index.analysis.AnalysisModule;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.settings.IndexSettingsModule;
import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
import org.hamcrest.MatcherAssert;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;

import java.io.IOException;
import java.io.StringReader;

import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS;
import static org.hamcrest.Matchers.instanceOf;

/**
 *
 */
public class SimpleMorphologyAnalysisTests {

    private AnalysisService getAnalysisService() {
        Index index = new Index("test");

        Injector parentInjector = new ModulesBuilder().add(new SettingsModule(EMPTY_SETTINGS),
                new EnvironmentModule(new Environment(EMPTY_SETTINGS)), new IndicesAnalysisModule())
                .createInjector();
        Injector injector = new ModulesBuilder()
                .add(new IndexSettingsModule(index, EMPTY_SETTINGS), new IndexNameModule(index),
                        new AnalysisModule(EMPTY_SETTINGS, parentInjector.getInstance(IndicesAnalysisService.class))
                                .addProcessor(new MorphologyAnalysisBinderProcessor()))
                .createChildInjector(parentInjector);

        return injector.getInstance(AnalysisService.class);

    }

    public static void assertSimpleTSOutput(TokenStream stream, String[] expected) throws IOException {
        stream.reset();
        CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
        Assert.assertNotNull(termAttr);
        int i = 0;
        while (stream.incrementToken()) {
            Assert.assertTrue("got extra term: " + termAttr.toString(), i < expected.length);
            Assert.assertEquals("expected different term at index " + i, termAttr.toString(), expected[i]);
            i++;
        }
        Assert.assertEquals("not all tokens produced", i, expected.length);
    }

    @Test
    public void testMorphologyAnalysis() throws Exception {
        AnalysisService analysisService = getAnalysisService();

        NamedAnalyzer russianAnalyzer = analysisService.analyzer("russian_morphology");
        MatcherAssert.assertThat(russianAnalyzer.analyzer(), instanceOf(RussianAnalyzer.class));
        assertSimpleTSOutput(russianAnalyzer.tokenStream("test", new StringReader("?")),
                new String[] { "?", "?" });

        NamedAnalyzer englishAnalyzer = analysisService.analyzer("english_morphology");
        MatcherAssert.assertThat(englishAnalyzer.analyzer(), instanceOf(EnglishAnalyzer.class));
        assertSimpleTSOutput(englishAnalyzer.tokenStream("test", new StringReader("gone")),
                new String[] { "gone", "go" });
    }

    @Test
    @Ignore(" ? ?")
    public void testPm() throws Exception {
        LuceneMorphology russianLuceneMorphology = new RussianLuceneMorphology();
        LuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();

        MorphologyAnalyzer russianAnalyzer = new MorphologyAnalyzer(russianLuceneMorphology);
        TokenStream stream = russianAnalyzer.tokenStream("name", new FastStringReader("?  ?"));
        MorphologyFilter englishFilter = new MorphologyFilter(stream, englishLuceneMorphology);
        assertSimpleTSOutput(englishFilter,
                new String[] { "?", "?", "", "?", "?" });
    }

}