indexer.IndexSplitter.java Source code

Java tutorial

Introduction

Here is the source code for indexer.IndexSplitter.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package indexer;

import static indexer.TweetIndexer.FIELD_ANALYZED_CONTENT;
import static indexer.TweetIndexer.FIELD_CODEMIXED;
import static indexer.TweetIndexer.FIELD_ID;
import java.io.File;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 *
 * @author Debasis
 */
public class IndexSplitter extends TweetIndexer {
    IndexWriter pureIndexWriter;
    IndexWriter mixedIndexWriter;

    public IndexSplitter(String propFile) throws Exception {
        super(propFile);

        File pureIndexDir = new File(prop.getProperty("splitindex.pure"));
        File mixedIndexDir = new File(prop.getProperty("splitindex.mixed"));

        IndexWriterConfig iwcfg_pure = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
        iwcfg_pure.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        IndexWriterConfig iwcfg_mixed = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
        iwcfg_mixed.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

        pureIndexWriter = new IndexWriter(FSDirectory.open(pureIndexDir), iwcfg_pure);
        mixedIndexWriter = new IndexWriter(FSDirectory.open(mixedIndexDir), iwcfg_mixed);
    }

    public void split() throws Exception {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(indexDir));
        final int numDocs = reader.numDocs();
        IndexWriter pWriter; // pointer variable

        for (int i = 0; i < numDocs; i++) {
            Document d = reader.document(i);
            pWriter = d.get(FIELD_CODEMIXED).equals("1") ? mixedIndexWriter : pureIndexWriter;
            pWriter.addDocument(d);
        }

        reader.close();
        pureIndexWriter.close();
        mixedIndexWriter.close();
    }

    @Override
    Analyzer getAnalyzer() {
        if (analyzer != null)
            return analyzer;
        analyzer = TweetAnalyzerFactory.createAnalyzer(prop, false); // We want bigrams here
        return analyzer;
    }

    public static void main(String[] args) {

        if (args.length == 0) {
            args = new String[1];
            System.out.println("Usage: java IndexSplitter <prop-file>");
            args[0] = "init.properties";
        }

        try {
            IndexSplitter indexer = new IndexSplitter(args[0]);
            indexer.split();
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }
}