Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package edu.virginia.cs.index; import edu.virginia.cs.utility.SpecialAnalyzer; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * * @author Wasi */ public class PostLinkIndexer { /** * Creates the initial index files on disk * * @param indexPath * @return * @throws IOException */ private static IndexWriter setupIndex(String indexPath) throws IOException { Analyzer analyzer = new SpecialAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); config.setRAMBufferSizeMB(2048.0); FSDirectory dir; IndexWriter writer = null; dir = FSDirectory.open(new File(indexPath)); writer = new IndexWriter(dir, config); return writer; } /** * @param indexPath Where to create the index * @param prefix The prefix of all the paths in the fileList * @param fileList Each line is a path to a document * @throws IOException */ public static void index(String indexPath, String prefix, String fileList) throws IOException { System.out.println("Creating Lucene index..."); FieldType _contentFieldType = new FieldType(); _contentFieldType.setIndexed(true); _contentFieldType.setStored(true); FieldType _FieldType = new FieldType(); _FieldType.setIndexed(false); _FieldType.setStored(true); IndexWriter writer = setupIndex(indexPath); BufferedReader br = new BufferedReader(new FileReader(prefix + fileList)); String line; int indexed = 0; while ((line = br.readLine()) != null) { String[] splits = line.split("\t"); Document doc = new Document(); doc.add(new Field("id", splits[0], _contentFieldType)); doc.add(new Field("creationDate", splits[1], _FieldType)); doc.add(new Field("postId", splits[2], _FieldType)); doc.add(new Field("relatedPostId", splits[3], _FieldType)); doc.add(new Field("postLinkTypeId", splits[4], _FieldType)); writer.addDocument(doc); ++indexed; if (indexed % 100 == 0) { System.out.println(" -> indexed " + indexed + " docs..."); } } System.out.println(" -> indexed " + indexed + " total docs."); br.close(); writer.close(); } }