com.serendio.lingo3g.CreateLuceneIndex.java Source code

Java tutorial

Introduction

Here is the source code for com.serendio.lingo3g.CreateLuceneIndex.java

Source

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2015, Dawid Weiss, Stanisaw Osiski.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package com.serendio.lingo3g;

import java.io.File;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.carrot2.core.Document;
import com.serendio.lingo3g.ClusteringDataFromLucene;

/**
 * Create a Lucene index on disk based on {@link SampleDocumentData}.
 * 
 * @see ClusteringDataFromLucene
 */
public class CreateLuceneIndex {
    public static void main(String[] args) throws Exception {
        if (args.length != 1) {
            System.out.println("Args: index-dir");
            System.exit(-1);
        }

        File indexDir = new File(args[0]);
        if (indexDir.exists()) {
            System.out.println("Index directory already exists: " + indexDir.getAbsolutePath());
            System.exit(-2);
        }

        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir.toPath()), config);

        for (Document d : SampleDocumentData.DOCUMENTS_DATA_MINING) {
            final org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
            /*
             * We will create Lucene documents with searchable "fullContent" field and "title", 
             * "url" and "snippet" fields for clustering.
             */
            doc.add(new TextField("fullContent", d.getSummary(), Store.NO));

            doc.add(new TextField("title", d.getTitle(), Store.YES));
            doc.add(new TextField("snippet", d.getSummary(), Store.YES));
            doc.add(new StringField("url", d.getContentUrl(), Store.YES));
            writer.addDocument(doc);
        }

        writer.close();
    }
}