index.Indexcategory.java Source code

Java tutorial

Introduction

Here is the source code for index.Indexcategory.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package index;

import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.Scanner;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
 *
 * @author smita
 */
public class Indexcategory {
    public static void main(String[] args) throws IOException {
        String indexPath = "/Users/smita/Documents/ES/index/abstract/";
        String docsPath = null;
        boolean create = true;

        String path = "/Users/smita/Documents/data/dbpedia/short_abstracts_en.nq";
        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        } else {
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        }
        IndexWriter writer = new IndexWriter(dir, iwc);

        FileInputStream inputStream = null;
        Scanner sc = null;
        try {

            int linecount = 0;
            inputStream = new FileInputStream(path);
            sc = new Scanner(inputStream, "UTF-8");
            String ignore = sc.nextLine();
            while (sc.hasNextLine()) {
                linecount++;
                String line = sc.nextLine();
                //System.out.println(line);
                try {
                    String article = line.split("> ")[0];
                    String category = line.split("> ")[2];
                    //System.out.println(article+" ++ "+category);

                    //index row

                    article = article.substring(29, article.length() - 1);
                    //category=category.substring(38,category.length()-1);
                    //System.out.println(article+"    "+category);

                    Document doc = new Document();
                    doc.add(new TextField("article", article, Field.Store.YES));
                    doc.add(new TextField("category", category, Field.Store.YES));
                    if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
                        System.out.println("adding " + linecount);
                        writer.addDocument(doc);

                    } else {
                        System.out.println("updating ");
                        //writer.updateDocument(new Term("path", file.toString()), doc);
                    }
                } catch (Exception e) {
                }

            }
            if (sc.ioException() != null) {
                throw sc.ioException();
            }

        } finally {
            if (inputStream != null) {
                inputStream.close();
            }
            if (sc != null) {
                sc.close();
            }
        }

        writer.close();
    }

}