cn.fql.blogspider.SearchMain.java Source code

Java tutorial

Introduction

Here is the source code for cn.fql.blogspider.SearchMain.java

Source

/*
 *
 * Copyright (c) 2009 Wincor Nixdorf International GmbH,
 * Heinz-Nixdorf-Ring 1, 33106 Paderborn, Germany
 * All Rights Reserved.
 *
 * This software is the confidential and proprietary information
 * of Wincor Nixdorf ("Confidential Information"). You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered
 * into with Wincor Nixdorf.
 */
package cn.fql.blogspider;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.*;
import java.util.Date;

/**
 * cn.fql.blogspider
 *
 * @author Fu, quanlin, WN ASP SSD
 * @version $Revision$
 */
public class SearchMain {
    public static void main(String[] args) throws Exception {
        String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";

        if ((args.length > 0) && ((("-h".equals(args[0])) || ("-help".equals(args[0]))))) {
            System.out.println(usage);
            System.exit(0);
        }

        String index = "D:\\test\\index";
        String field = "contents";
        String queries = null;

        String queryString = null;
        int hitsPerPage = 10;

        //      for (int i = 0; i < args.length; ++i)
        //        if ("-index".equals(args[i])) {
        //          index = args[(i + 1)];
        //          ++i;
        //        } else if ("-field".equals(args[i])) {
        //          field = args[(i + 1)];
        //          ++i;
        //        } else if ("-queries".equals(args[i])) {
        //          queries = args[(i + 1)];
        //          ++i;
        //        } else if ("-query".equals(args[i])) {
        //          queryString = args[(i + 1)];
        //          ++i;
        //        } else if ("-repeat".equals(args[i])) {
        //          repeat = Integer.parseInt(args[(i + 1)]);
        //          ++i;
        //        } else if ("-raw".equals(args[i])) {
        //          raw = true;
        //        } else if ("-paging".equals(args[i])) {
        //          hitsPerPage = Integer.parseInt(args[(i + 1)]);
        //          if (hitsPerPage <= 0) {
        //            System.err.println("There must be at least 1 hit per page.");
        //            System.exit(1);
        //          }
        //          ++i;
        //        }

        IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
        IndexSearcher searcher = new IndexSearcher(reader);
        //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
        Analyzer analyzer = new IKAnalyzer();
        BufferedReader in = null;
        if (queries != null)
            in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
        else
            in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));

        QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer);
        while (true) {
            if ((queries == null) && (queryString == null)) {
                System.out.println("Enter query: ");
            }

            String line = (queryString != null) ? queryString : in.readLine();

            if (line == null)
                break;
            if (line.length() == -1) {
                break;
            }

            line = line.trim();
            if (line.length() == 0) {
                break;
            }

            Query query = parser.parse(line);
            System.out.println("Searching for: " + query.toString(field));

            doPagingSearch(in, searcher, query, hitsPerPage, (queries == null) && (queryString == null));

            if (queryString != null)
                break;
        }

        reader.close();
    }

    public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
            boolean interactive) throws IOException {
        TopDocs results = searcher.search(query, 5 * hitsPerPage);
        ScoreDoc[] hits = results.scoreDocs;

        int numTotalHits = results.totalHits;
        System.out.println(numTotalHits + " total matching documents");

        int start = 0;
        int end = Math.min(numTotalHits, hitsPerPage);
        while (true) {
            do {
                if (end > hits.length) {
                    System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                            + " total matching documents collected.");
                    System.out.println("Collect more (y/n) ?");
                    String line = in.readLine();
                    if (line.length() == 0)
                        return;
                    if (line.charAt(0) == 'n') {
                        return;
                    }

                    hits = searcher.search(query, numTotalHits).scoreDocs;
                }

                end = Math.min(hits.length, start + hitsPerPage);

                for (int i = start; i < end; ++i) {

                    Document doc = searcher.doc(hits[i].doc);
                    String path = doc.get("path");
                    //String modifyDate=doc.get("modified");
                    System.out.println(
                            "doc=" + hits[i].doc + " score=" + hits[i].score + "------" + (i + 1) + ". " + path);
                }

                if (!(interactive))
                    return;
                if (end == 0)
                    return;
            }

            while (numTotalHits < end);
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0)
                    System.out.print("(p)revious page, ");

                if (start + hitsPerPage < numTotalHits)
                    System.out.print("(n)ext page, ");

                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if ((line.length() == 0) || (line.charAt(0) == 'q')) {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                }
                if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage >= numTotalHits)
                        break;
                    start += hitsPerPage;
                    break;
                }

                int page = Integer.parseInt(line);
                if ((page - 1) * hitsPerPage < numTotalHits) {
                    start = (page - 1) * hitsPerPage;
                    break;
                }
                System.out.println("No such page");
            }

            if (quit)
                return;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}
/**
 * History:
 *
 *  : cn.fql.blogspider  $
 */