com.xiaomi.linden.lucene.analyzer.TestLindenWordDelimiterAnalyzer.java Source code

Introduction

Here is the source code for com.xiaomi.linden.lucene.analyzer.TestLindenWordDelimiterAnalyzer.java
Source

// Copyright 2016 Xiaomi, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.xiaomi.linden.lucene.analyzer;

import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.Assert;
import org.junit.Test;

import com.xiaomi.linden.bql.BQLCompiler;
import com.xiaomi.linden.core.LindenConfig;
import com.xiaomi.linden.core.TestLindenCoreBase;
import com.xiaomi.linden.thrift.common.LindenFieldSchema;
import com.xiaomi.linden.thrift.common.LindenResult;
import com.xiaomi.linden.thrift.common.LindenSchema;
import com.xiaomi.linden.thrift.common.LindenSearchRequest;

public class TestLindenWordDelimiterAnalyzer extends TestLindenCoreBase {

    public TestLindenWordDelimiterAnalyzer() throws Exception {
        try {
            handleRequest("{\"id\":1, \"title\": [\"?\"]}");
            handleRequest("{\"id\":2, \"title\": [\"?\"]}");
            handleRequest(
                    "{\"id\":3, \"title\": [\"?!!!\", \"\", \"\", \"??\", \"?\", \"\"]}");
            lindenCore.commit();
            lindenCore.refresh();
            bqlCompiler = new BQLCompiler(lindenConfig.getSchema());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    @Override
    public void init() {
        lindenConfig.setIndexType(LindenConfig.IndexType.RAM);
        lindenConfig.setClusterUrl("127.0.0.1:2181/mock");
        LindenSchema schema = new LindenSchema().setId("id");
        schema.addToFields(new LindenFieldSchema().setName("title").setIndexed(true).setTokenized(true)
                .setSnippet(true).setMulti(true));
        lindenConfig.setSchema(schema);
        lindenConfig.putToProperties("search.analyzer.class",
                "com.xiaomi.linden.lucene.analyzer.LindenWordDelimiterAnalyzerFactory");
        lindenConfig.putToProperties("index.analyzer.class",
                "com.xiaomi.linden.lucene.analyzer.LindenWordDelimiterAnalyzerFactory");
        lindenConfig.putToProperties("index.analyzer.luceneMatchVersion", "LUCENE_4_10_0");
        lindenConfig.putToProperties("search.analyzer.luceneMatchVersion", "LUCENE_4_10_0");
    }

    @Test
    public void testIndexMode() throws IOException {
        String bql = "select * from linden by query is \"title:?\"";
        LindenSearchRequest request = bqlCompiler.compile(bql).getSearchRequest();
        LindenResult result = lindenCore.search(request);
        Assert.assertEquals(3, result.getTotalHits());

        // phrase test
        bql = "select * from linden by query is 'title:\"?\"'";
        request = bqlCompiler.compile(bql).getSearchRequest();
        result = lindenCore.search(request);
        Assert.assertEquals(2, result.getTotalHits());
        bql = "select * from linden by query is 'title:\"?\"'";
        request = bqlCompiler.compile(bql).getSearchRequest();
        result = lindenCore.search(request);
        Assert.assertEquals(2, result.getTotalHits());

        // snippet test
        bql = "select * from linden by query is 'title:(?)' snippet title";
        request = bqlCompiler.compile(bql).getSearchRequest();
        result = lindenCore.search(request);
        Assert.assertEquals(3, result.getTotalHits());
        Assert.assertEquals(
                "<b></b><b></b><b></b><b></b><b></b><b>?</b><b></b>!!! <b></b><b></b>  ?? ?<b></b> ",
                result.getHits().get(0).getSnippets().get("title").getSnippet());
        Assert.assertEquals("<b></b><b></b><b></b><b></b><b></b><b>?</b><b></b>",
                result.getHits().get(1).getSnippets().get("title").getSnippet());

    }

    @Test
    public void testLindenWordDelimiterAnalyzer() throws Exception {
        LindenWordDelimiterAnalyzerFactory wordDelimiterAnalyzerFactory = new LindenWordDelimiterAnalyzerFactory();
        Map<String, String> args = new HashMap<>();
        Map<String, String> lastargs = new HashMap<>();
        args.put("luceneMatchVersion", "LUCENE_4_10_0");
        lastargs.putAll(args);
        Analyzer analyzer = wordDelimiterAnalyzerFactory.getInstance(args);
        TokenStream stream = analyzer.tokenStream("", new StringReader("Hello, this is a test case. "
                + "" + "created2018by sls sun-li-shun SunLiShun"));
        String expected = "[hello][test][case][][][][][][][][][][][][created][2018][sls][sun][li][shun][sun][li][shun]";
        String out = "";
        stream.reset();
        while (stream.incrementToken()) {
            out += "[" + stream.getAttribute(CharTermAttribute.class).toString() + "]";
        }
        Assert.assertEquals(expected, out);

        args.put("lower.case", "false");
        args.putAll(lastargs);
        lastargs.putAll(args);
        analyzer = wordDelimiterAnalyzerFactory.getInstance(args);
        stream = analyzer.tokenStream("", new StringReader("Hello, this is a test case. "
                + "" + "created2018by sls on 20140707"));
        expected = "[Hello][test][case][][][][][][][][][][][][created][2018][sls][20140707]";
        out = "";
        stream.reset();
        while (stream.incrementToken()) {
            out += "[" + stream.getAttribute(CharTermAttribute.class).toString() + "]";
        }
        Assert.assertEquals(expected, out);

        args.put("set.stopwords", "false");
        args.putAll(lastargs);
        lastargs.putAll(args);
        analyzer = wordDelimiterAnalyzerFactory.getInstance(args);
        stream = analyzer.tokenStream("", new StringReader("Hello, this is a test case. "
                + "" + "created2018by sls on 20140707"));
        expected = "[Hello][this][is][a][test][case][][][][][][][][][][][][created][2018][by][sls][on][20140707]";
        out = "";
        stream.reset();
        while (stream.incrementToken()) {
            out += "[" + stream.getAttribute(CharTermAttribute.class).toString() + "]";
        }
        Assert.assertEquals(expected, out);

        args.putAll(lastargs);
        args.put("splitOnCaseChange", "0");
        args.put("set.stopwords", "false");
        args.put("lower.case", "true");
        lastargs.putAll(args);
        analyzer = wordDelimiterAnalyzerFactory.getInstance(args);
        stream = analyzer.tokenStream("", new StringReader("Hello, this is a test case. "
                + "" + "created2018by sls sun-li-shun SunLiShun"));
        expected = "[hello][this][is][a][test][case][][][][][][][][][][][][created][2018][by][sls][sun][li][shun][sunlishun]";
        out = "";
        stream.reset();
        while (stream.incrementToken()) {
            out += "[" + stream.getAttribute(CharTermAttribute.class).toString() + "]";
        }
        Assert.assertEquals(expected, out);
    }
}