Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package edu.virginia.cs.index; import edu.virginia.cs.utility.SpecialAnalyzer; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * * @author Wasi */ public class AnswerIndexer { /** * Creates the initial index files on disk * * @param indexPath * @return * @throws IOException */ private static IndexWriter setupIndex(String indexPath) throws IOException { Analyzer analyzer = new SpecialAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); config.setRAMBufferSizeMB(2048.0); FSDirectory dir = FSDirectory.open(new File(indexPath)); IndexWriter writer = new IndexWriter(dir, config); return writer; } /** * @param indexPath Where to create the index * @param prefix The prefix of all the paths in the fileList * @param fileList Each line is a path to a document */ public static void index(String indexPath, String prefix, String fileList) throws IOException { String line; System.out.println("Creating Lucene index..."); FieldType _contentFieldType = new FieldType(); _contentFieldType.setIndexed(true); _contentFieldType.setStored(true); FieldType _FieldType = new FieldType(); _FieldType.setIndexed(false); _FieldType.setStored(true); IndexWriter writer = setupIndex(indexPath); BufferedReader br = new BufferedReader(new FileReader(prefix + fileList)); int indexed = 0; while ((line = br.readLine()) != null) { String[] splits = line.split("\t"); if (Integer.parseInt(splits[1]) == 2) { Document doc = new Document(); doc.add(new Field("id", splits[0], _contentFieldType)); doc.add(new Field("parentId", splits[2], _FieldType)); doc.add(new Field("acceptedAnswerId", splits[3], _FieldType)); doc.add(new Field("creationDate", splits[4], _FieldType)); doc.add(new Field("score", splits[5], _FieldType)); doc.add(new Field("viewCount", splits[6], _FieldType)); doc.add(new Field("body", splits[7], _contentFieldType)); doc.add(new Field("code", splits[8], _contentFieldType)); doc.add(new Field("ownerId", splits[9], _FieldType)); doc.add(new Field("title", splits[10], _FieldType)); doc.add(new Field("tags", splits[11], _FieldType)); doc.add(new Field("answerCount", splits[12], _FieldType)); doc.add(new Field("commentCount", splits[13], _FieldType)); doc.add(new Field("favoriteCount", splits[14], _FieldType)); writer.addDocument(doc); ++indexed; if (indexed % 100 == 0) { System.out.println(" -> indexed " + indexed + " docs..."); } } } System.out.println(" -> indexed " + indexed + " total docs."); br.close(); writer.close(); } }