net.skyatlas.icd.dao.daoImpl.AnsjAnalysisTest.java Source code

Java tutorial

Introduction

Here is the source code for net.skyatlas.icd.dao.daoImpl.AnsjAnalysisTest.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package net.skyatlas.icd.dao.daoImpl;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Date;
import java.util.HashSet;
import java.util.ResourceBundle;
import love.cq.util.IOUtil;
import org.ansj.lucene.util.PorterStemmer;
import org.ansj.lucene4.AnsjAnalysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

/**
 *
 * @author changzhenghe
 */
public class AnsjAnalysisTest {

    @Test
    public void test() throws IOException {
        Token nt = new Token();
        Analyzer ca = new AnsjAnalysis();
        Reader sentence = new StringReader(
                "\n\n\n\n\n\n\n????, ????????????????????????????"
                        + "???????????????????"
                        + "??????????? ??????????????2????"
                        + ""
                        + "? ?????????????  ??? ????????");
        TokenStream ts = ca.tokenStream("sentence", sentence);

        System.out.println("start: " + (new Date()));
        long before = System.currentTimeMillis();
        while (ts.incrementToken()) {
            System.out.println(ts.getAttribute(CharTermAttribute.class));
        }
        ts.close();
        long now = System.currentTimeMillis();
        System.out.println("time: " + (now - before) / 1000.0 + " s");
    }

    @Test
    public void indexTest() throws CorruptIndexException, LockObtainFailedException, IOException {
        HashSet<String> hs = new HashSet<String>();
        //        BufferedReader reader2 = IOUtil.getReader(ResourceBundle.getBundle("library").getString("stopLibrary"), "UTF-8");
        //        String word = null;
        //        while ((word = reader2.readLine()) != null) {
        //            hs.add(word);
        //        }
        Analyzer analyzer = new AnsjAnalysis(hs, false);
        Directory directory = null;
        IndexWriter iwriter = null;

        //        BufferedReader reader = IOUtil.getReader("/Users/ansj/Desktop/??/indextest.txt", "UTF-8");
        //        String temp = null;
        //        StringBuilder sb = new StringBuilder();
        //        while ((temp = reader.readLine()) != null) {
        //            sb.append(temp);
        //            sb.append("\n");
        //        }
        //        reader.close();
        String text = "????????????  ??? ????????";

        IndexWriterConfig ic = new IndexWriterConfig(Version.LUCENE_32, analyzer);
        // 
        directory = new RAMDirectory();
        iwriter = new IndexWriter(directory, ic);
        // BufferedReader reader =
        // IOUtil.getReader("/Users/ansj/Documents//?//1998?_.txt",
        // "GBK");
        // String temp = null;
        // while ((temp = reader.readLine()) != null) {
        // addContent(iwriter, temp);
        // }
        addContent(iwriter, text);
        addContent(iwriter, text);
        addContent(iwriter, text);
        addContent(iwriter, text);
        iwriter.commit();
        iwriter.close();

        System.out.println("");

        //        search(analyzer, directory, "");
    }

    private void addContent(IndexWriter iwriter, String text) throws CorruptIndexException, IOException {
        Document doc = new Document();
        doc.add(new Field("text", text, Field.Store.YES, Field.Index.ANALYZED));
        iwriter.addDocument(doc);
    }
}