Example usage for org.apache.lucene.search IndexSearcher IndexSearcher

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher IndexSearcher.

Prototype

public IndexSearcher(IndexReaderContext context)

Source Link

Document

Creates a searcher searching the provided top-level IndexReaderContext .

Usage

From source file:SimpleNaiveBayesClassifier.java

License:Apache License

/**
 * Creates a new NaiveBayes classifier./*from www  .j a v a2  s .c  om*/
 *
 * @param leafReader     the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param classFieldName the name of the field used as the output for the classifier NOTE: must not be havely analyzed
 *                       as the returned class will be a token indexed for this field
 * @param textFieldNames the name of the fields used as the inputs for the classifier, NO boosting supported per field
 */
public SimpleNaiveBayesClassifier(IndexReader leafReader, Analyzer analyzer, Query query, String classFieldName,
        String... textFieldNames) {
    this.leafReader = leafReader;
    this.indexSearcher = new IndexSearcher(this.leafReader);
    this.textFieldNames = textFieldNames;
    this.classFieldName = classFieldName;
    this.analyzer = analyzer;
    this.query = query;
}

From source file:GUIFrame.java

public int SearchResults(PorterStemAnalyzer Analyzer, Directory Index, String userInput,
        DefaultListModel DocList) throws ParseException, IOException {
    // The query// www.j  a  v a  2s . co  m
    userInput = userInput.replace("\"", "");
    Query q = new QueryParser(Version.LATEST, "summary", Analyzer).parse(userInput);

    // The search
    int hitsPerPage = 20; // return 20 top documents
    IndexReader indoReader = DirectoryReader.open(Index);
    IndexSearcher indoSearcher = new IndexSearcher(indoReader);
    TopScoreDocCollector docCollector = TopScoreDocCollector.create(hitsPerPage, true);
    indoSearcher.search(q, docCollector);
    ScoreDoc[] hits = docCollector.topDocs().scoreDocs;

    // Copy results to list models
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document d = indoSearcher.doc(docId);
        DocList.addElement(d.get("docID"));
        SumListModel.addElement(d.get("summary"));
    }

    GetTerms(Index, Analyzer, "summary", userInput);

    return hits.length;
}

From source file:SearchFiles.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);//  www  .j ava2s  .co m
    }

    String index = "index";
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 10;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    if (queries != null) {
        in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8);
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    }
    QueryParser parser = new QueryParser(field, analyzer);
    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}

From source file:MyServlet.java

private void gotoSearch(PrintWriter out, HttpServletRequest request, HttpServletResponse response) {
    try {/*from  w  w  w . j  a va 2s.com*/
        //   Text to search
        String querystr = request.getParameter("keyword");

        log.addHistory(querystr);

        //   The \"title\" arg specifies the default field to use when no field is explicitly specified in the query
        Query q = new QueryParser("Classes", analyzer).parse(querystr);

        // Searching code
        int hitsPerPage = 10;
        IndexReader reader = DirectoryReader.open(index);
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        //   Code to display the results of search
        //out.println("Found " + hits.length + " Classes Matching your Requirement");
        courseList = new ArrayList();
        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            Course course = new Course(d.get("Number"), d.get("Classes"), d.get("Time"), d.get("Department"));
            //out.println((i + 1) + ". " +  d.get("Number")+ d.get("Classes") );
            courseList.add(course);
        }
        request.setAttribute("course", courseList);
        RequestDispatcher de = request.getRequestDispatcher("/table.jsp");
        de.forward(request, response);

        // reader can only be closed when there is no need to access the documents any more
        reader.close();
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
}

From source file:ContentBasedAnalysis.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava QueryConvert [-index dir]";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);/*  w  w w.  j  a  v  a 2  s . c om*/
    }

    String index = "index";
    String field = "contents";
    String queries = null;
    String queryString = null;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        }
    }

    // Creation of reader and a searcher for the index
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    // Reader to read File Names
    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }

    while (true) {
        System.out.println("Enter filename 1 (or hit <RETURN>): ");
        String f1 = in.readLine();
        if (f1 == null || f1.length() == -1)
            break;
        f1 = f1.trim();
        if (f1.length() == 0)
            break;
        System.out.println("Enter filename 2: ");
        String f2 = in.readLine();
        int id1 = findDocId(searcher, f1);
        if (id1 < 0) {
            System.out.println("No file " + f1 + " found in index!");
            break;
        }
        int id2 = findDocId(searcher, f2);
        if (id1 < 0) {
            System.out.println("No file " + f1 + " found in index!");
            break;
        }

        // Conversion to TF-IDF format
        TermWeight[] v1 = toTfIdf(reader, id1);
        TermWeight[] v2 = toTfIdf(reader, id2);

        System.out.println("The cosine similarity of the two files is: " + cosineSimilarity(v1, v2));

    }
    searcher.close();
    reader.close();
}

From source file:SearcherTest.java

@Before
public void setUp() throws Exception {
    dir = FSDirectory.open(Paths.get("E:\\lucene\\idx"));
    reader = DirectoryReader.open(dir);/*from w ww  .  ja  va  2  s  . com*/
    is = new IndexSearcher(reader);
}

From source file:SearchHelpDocs.java

License:Open Source License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {

    String usage = "Usage: java SearchFiles index-dir";
    if (args.length != 1) {
        System.out.println(usage);
        System.exit(0);/*from   w  ww.j  av a  2s . c o  m*/
    }
    String index = args[0];
    String field = LUC_KEY_CONTENT;
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String normsField = null;

    System.out.println("INFO: index-directory=" + index);
    IndexReader reader = IndexReader.open(index);
    if (normsField != null)
        reader = new OneNormsReader(reader, normsField);

    Searcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));

    String[] fields = { LUC_KEY_CONTENT, LUC_KEY_FULL_PATH, LUC_KEY_FILE_NAME };
    MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);

    printHelpInformation();
    while (true) {
        // prompt the user         
        System.out.print(CMDLINE_PREFIX);
        System.out.flush();
        String line = in.readLine();
        if (line == null || line.length() < 0)
            break;
        if (line.trim().length() == 0) {
            continue;
        }
        // Exit gracefully.
        if (line.trim().equalsIgnoreCase(":quit")) {
            System.out.println("INFO: quit successful");
            break;
        }

        // Modify for fuzzy query (E.g. ~0.58), also use wildcard postfix (*)
        line = line + "~";
        Object obj = parser.parse(line);
        Query query = parser.parse(line);
        System.out.println(CMDLINE_PREFIX + "Searching for: [" + line + "] query=" + query.toString(field));
        System.out.flush();
        // Search and also add the sort element
        Hits hits = searcher.search(query, createSort());
        if (repeat > 0) {
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                hits = searcher.search(query);
            }
            Date end = new Date();
            System.out.println(CMDLINE_PREFIX + "Time: " + (end.getTime() - start.getTime()) + "ms");
        }
        System.out.println(hits.length() + " total matching documents");
        for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
            int end = Math.min(hits.length(), start + HITS_PER_PAGE);
            for (int i = start; i < end; i++) {

                System.out.println(CMDLINE_PREFIX + "doc=" + hits.id(i) + " score=" + hits.score(i));

                // Ignore scores based on a certain threshold
                if (hits.score(i) < 0.09)
                    continue;

                Document doc = hits.doc(i);
                String path = doc.get(LUC_KEY_CONTENT);
                if (path != null) {
                    // Attempt to pretty print help document information
                    System.out.println("\n == Help Document Found; docid=" + hits.id(i));
                    System.out.println("*************************");
                    String fullpath = doc.get(LUC_KEY_FULL_PATH);
                    String filename = doc.get(LUC_KEY_FILE_NAME);
                    String content = doc.get(LUC_KEY_CONTENT);
                    String id = doc.get(LUC_KEY_IDENTITY);
                    if (filename != null) {
                        System.out.println("   +Filename: " + doc.get(filename));
                    }
                    if (fullpath != null) {
                        System.out.println("   +Path: " + doc.get(fullpath));
                    }
                    System.out.println("   id: " + id);
                    System.out.println(" == Content:");
                    System.out.println(prettyPrintContent(content));
                    System.out.println("-------------------------");

                    System.out.println();
                } else {
                    System.out.println((i + 1) + ". " + "No content for this document");
                }
            }
            if (queries != null) // non-interactive
                break;
            if (hits.length() > end) {
                System.out.print("more (y/n) ? ");
                line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'n')
                    break;
            }
        }
    }
    reader.close();
}

From source file:IrqaQuery.java

License:Apache License

public static List<Document> query(String index, String stoppath, String question, int numResult, String sim)
        throws Exception {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath)));

    if (sim.equals("TFIDF"))
        searcher.setSimilarity(new ClassicSimilarity());
    else if (sim.equals("BM25"))
        searcher.setSimilarity(new BM25Similarity());
    else//ww w. j a  v a2  s. c  o  m
        searcher.setSimilarity(new BM25Similarity());

    String field = "contents";
    QueryParser parser = new QueryParser(field, analyzer);
    Query query = parser.parse(parser.escape(question));

    TopDocs results = searcher.search(query, numResult);
    ScoreDoc[] hits = results.scoreDocs;
    List<Document> docs = new ArrayList<Document>();

    int numTotalHits = results.totalHits;
    //        System.out.println(numTotalHits + " total matching documents");

    int end = Math.min(numTotalHits, numResult);

    String searchResult = "";
    //        System.out.println("Only results 1 - " + hits.length);

    for (int i = 0; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);
        docs.add(doc);
    }

    return docs;
}

From source file:Get_Top_Documents_Based_on_Lucene.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);//from   w  w w . jav  a 2s . co m
    }

    //String index = "index";
    //String index = "index_wiki_2";
    String index = "index_external_links_v1/";
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 20;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    if (queries != null) {
        in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8);
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    }
    QueryParser parser = new QueryParser(field, analyzer);

    // Read Question in Training Data line by line
    //String path_train = "data/training_set.tsv";
    //String path_output = "data/lucene_search_result_train.txt";
    //String path_train = "data/validation_set.tsv";
    //String path_output = "data/lucene_search_result_validation_index_wiki_2.txt";
    String path_train = "data/training_set.tsv";
    String path_output = "data/lucene_search_result_train_index_wiki_external_links_v1.txt";
    Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path_output), "UTF-8"));
    try (BufferedReader br = new BufferedReader(new FileReader(path_train))) {
        String line;
        while ((line = br.readLine()) != null) {
            line = line.trim();
            String[] lst = line.split("\t");
            String query_s = lst[1];
            if (query_s == "question") {
                continue;
            }
            System.out.println("query_s: " + query_s);
            writer.write(query_s + "\t");
            try {
                Query query = parser.parse(query_s);
                System.out.println("Searching for: " + query.toString(field));
                doPagingSearch(in, writer, searcher, query, hitsPerPage, raw,
                        queries == null && queryString == null);
            } catch (org.apache.lucene.queryparser.classic.ParseException e) {
                continue;
            }
        } // while
    }
    writer.close();
    /*
    while (true) {
      if (queries == null && queryString == null) {                        // prompt the user
        System.out.println("Enter query: ");
      }
            
      String line = queryString != null ? queryString : in.readLine();
            
      if (line == null || line.length() == -1) {
        break;
      }
            
      line = line.trim();
      if (line.length() == 0) {
        break;
      }
              
      Query query = parser.parse(line);
      System.out.println("Searching for: " + query.toString(field));
            
      if (repeat > 0) {                           // repeat & time as benchmark
        Date start = new Date();
        for (int i = 0; i < repeat; i++) {
          searcher.search(query, 100);
        }
        Date end = new Date();
        System.out.println("Time: "+(end.getTime()-start.getTime())+"ms");
      }
            
      doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);
            
      if (queryString != null) {
        break;
      }
    } // while (True)
    */
    reader.close();
}

From source file:QueryLuceneIndex.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\t[-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);/*from   www  . java  2  s  . c  o  m*/
    }

    String index = "/home/chrisschaefer/enwiki-20130604-lucene_bkp";
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 10;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);

    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }
    QueryParser parser = new QueryParser(Version.LUCENE_43, field, analyzer);
    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }
        //Query query = new TermQuery(new Term("title", line));
        Query query = parser.parse(line);
        //System.out.println("Searching for: " + query.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, null, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}