Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package net.skyatlas.icd.dao.daoImpl; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.Date; import java.util.HashSet; import java.util.ResourceBundle; import love.cq.util.IOUtil; import org.ansj.lucene.util.PorterStemmer; import org.ansj.lucene4.AnsjAnalysis; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.junit.Test; /** * * @author changzhenghe */ public class AnsjAnalysisTest { @Test public void test() throws IOException { Token nt = new Token(); Analyzer ca = new AnsjAnalysis(); Reader sentence = new StringReader( "\n\n\n\n\n\n\n????, ????????????????????????????" + "???????????????????" + "??????????? ??????????????2????" + "" + "? ????????????? ??? ????????"); TokenStream ts = ca.tokenStream("sentence", sentence); System.out.println("start: " + (new Date())); long before = System.currentTimeMillis(); while (ts.incrementToken()) { System.out.println(ts.getAttribute(CharTermAttribute.class)); } ts.close(); long now = System.currentTimeMillis(); System.out.println("time: " + (now - before) / 1000.0 + " s"); } @Test public void indexTest() throws CorruptIndexException, LockObtainFailedException, IOException { HashSet<String> hs = new HashSet<String>(); // BufferedReader reader2 = IOUtil.getReader(ResourceBundle.getBundle("library").getString("stopLibrary"), "UTF-8"); // String word = null; // while ((word = reader2.readLine()) != null) { // hs.add(word); // } Analyzer analyzer = new AnsjAnalysis(hs, false); Directory directory = null; IndexWriter iwriter = null; // BufferedReader reader = IOUtil.getReader("/Users/ansj/Desktop/??/indextest.txt", "UTF-8"); // String temp = null; // StringBuilder sb = new StringBuilder(); // while ((temp = reader.readLine()) != null) { // sb.append(temp); // sb.append("\n"); // } // reader.close(); String text = "???????????? ??? ????????"; IndexWriterConfig ic = new IndexWriterConfig(Version.LUCENE_32, analyzer); // directory = new RAMDirectory(); iwriter = new IndexWriter(directory, ic); // BufferedReader reader = // IOUtil.getReader("/Users/ansj/Documents//?//1998?_.txt", // "GBK"); // String temp = null; // while ((temp = reader.readLine()) != null) { // addContent(iwriter, temp); // } addContent(iwriter, text); addContent(iwriter, text); addContent(iwriter, text); addContent(iwriter, text); iwriter.commit(); iwriter.close(); System.out.println(""); // search(analyzer, directory, ""); } private void addContent(IndexWriter iwriter, String text) throws CorruptIndexException, IOException { Document doc = new Document(); doc.add(new Field("text", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.addDocument(doc); } }