cn.fql.blogspider.IndexMain.java Source code

Java tutorial

Introduction

Here is the source code for cn.fql.blogspider.IndexMain.java

Source

/*
 *
 * Copyright (c) 2009 Wincor Nixdorf International GmbH,
 * Heinz-Nixdorf-Ring 1, 33106 Paderborn, Germany
 * All Rights Reserved.
 *
 * This software is the confidential and proprietary information
 * of Wincor Nixdorf ("Confidential Information"). You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered
 * into with Wincor Nixdorf.
 */
package cn.fql.blogspider;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.*;
import java.util.Date;

/**
 * cn.fql.blogspider
 *
 * @author Fu, quanlin, WN ASP SSD
 * @version $Revision$
 */
public class IndexMain {

    public static void main(String[] args) {

        String indexPath = "d:/test/index";
        String docsPath = "d:/test/docs";
        boolean create = true;

        File docDir = new File(docsPath);

        Date start = new Date();
        try {
            System.out.println("Indexing to directory '" + indexPath + "'...");

            Directory dir = FSDirectory.open(new File(indexPath));
            //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
            Analyzer analyzer = new IKAnalyzer();
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);

            if (create) {
                iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            } else {
                iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            }

            IndexWriter writer = new IndexWriter(dir, iwc);
            indexDocs(writer, docDir);

            writer.close();

            Date end = new Date();
            System.out.println((end.getTime() - start.getTime()) + " total milliseconds");
        } catch (IOException e) {
            System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
        }
    }

    static void indexDocs(IndexWriter writer, File file) throws IOException {
        if (file.canRead())
            if (file.isDirectory()) {
                String[] files = file.list();

                if (files != null)
                    for (int i = 0; i < files.length; ++i)
                        indexDocs(writer, new File(file, files[i]));
            } else {
                FileInputStream fis;
                try {
                    fis = new FileInputStream(file);
                } catch (FileNotFoundException fnfe) {
                    return;
                }

                try {
                    Document doc = new Document();

                    Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                    doc.add(pathField);

                    doc.add(new LongField("modified", file.lastModified(), Field.Store.YES));

                    doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                    if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
                        System.out.println("adding " + file);
                        writer.addDocument(doc);
                    } else {
                        System.out.println("updating " + file);
                        writer.updateDocument(new Term("path", file.getPath()), doc);
                    }
                } finally {
                    fis.close();
                }
            }
    }
}
/**
 * History:
 *
 *  : cn.fql.blogspider  $
 */