Example usage for org.apache.lucene.search IndexSearcher IndexSearcher

List of usage examples for org.apache.lucene.search IndexSearcher IndexSearcher

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher IndexSearcher.

Prototype

public IndexSearcher(IndexReaderContext context) 

Source Link

Document

Creates a searcher searching the provided top-level IndexReaderContext .

Usage

From source file:SimpleNaiveBayesClassifier.java

License:Apache License

/**
 * Creates a new NaiveBayes classifier./*from www  .j a v a2  s .c  om*/
 *
 * @param leafReader     the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param classFieldName the name of the field used as the output for the classifier NOTE: must not be havely analyzed
 *                       as the returned class will be a token indexed for this field
 * @param textFieldNames the name of the fields used as the inputs for the classifier, NO boosting supported per field
 */
public SimpleNaiveBayesClassifier(IndexReader leafReader, Analyzer analyzer, Query query, String classFieldName,
        String... textFieldNames) {
    this.leafReader = leafReader;
    this.indexSearcher = new IndexSearcher(this.leafReader);
    this.textFieldNames = textFieldNames;
    this.classFieldName = classFieldName;
    this.analyzer = analyzer;
    this.query = query;
}

From source file:GUIFrame.java

public int SearchResults(PorterStemAnalyzer Analyzer, Directory Index, String userInput,
        DefaultListModel DocList) throws ParseException, IOException {
    // The query// www.j  a  v a  2s . co  m
    userInput = userInput.replace("\"", "");
    Query q = new QueryParser(Version.LATEST, "summary", Analyzer).parse(userInput);

    // The search
    int hitsPerPage = 20; // return 20 top documents
    IndexReader indoReader = DirectoryReader.open(Index);
    IndexSearcher indoSearcher = new IndexSearcher(indoReader);
    TopScoreDocCollector docCollector = TopScoreDocCollector.create(hitsPerPage, true);
    indoSearcher.search(q, docCollector);
    ScoreDoc[] hits = docCollector.topDocs().scoreDocs;

    // Copy results to list models
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document d = indoSearcher.doc(docId);
        DocList.addElement(d.get("docID"));
        SumListModel.addElement(d.get("summary"));
    }

    GetTerms(Index, Analyzer, "summary", userInput);

    return hits.length;
}

From source file:SearchFiles.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);//  www  .j ava2s  .co m
    }

    String index = "index";
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 10;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    if (queries != null) {
        in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8);
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    }
    QueryParser parser = new QueryParser(field, analyzer);
    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}

From source file:MyServlet.java

private void gotoSearch(PrintWriter out, HttpServletRequest request, HttpServletResponse response) {
    try {/*from  w  w  w . j  a va 2s.com*/
        //   Text to search
        String querystr = request.getParameter("keyword");

        log.addHistory(querystr);

        //   The \"title\" arg specifies the default field to use when no field is explicitly specified in the query
        Query q = new QueryParser("Classes", analyzer).parse(querystr);

        // Searching code
        int hitsPerPage = 10;
        IndexReader reader = DirectoryReader.open(index);
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        //   Code to display the results of search
        //out.println("Found " + hits.length + " Classes Matching your Requirement");
        courseList = new ArrayList();
        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            Course course = new Course(d.get("Number"), d.get("Classes"), d.get("Time"), d.get("Department"));
            //out.println((i + 1) + ". " +  d.get("Number")+ d.get("Classes") );
            courseList.add(course);
        }
        request.setAttribute("course", courseList);
        RequestDispatcher de = request.getRequestDispatcher("/table.jsp");
        de.forward(request, response);

        // reader can only be closed when there is no need to access the documents any more
        reader.close();
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
}

From source file:ContentBasedAnalysis.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava QueryConvert [-index dir]";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);/*  w  w w.  j  a  v  a 2  s . c om*/
    }

    String index = "index";
    String field = "contents";
    String queries = null;
    String queryString = null;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        }
    }

    // Creation of reader and a searcher for the index
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    // Reader to read File Names
    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }

    while (true) {
        System.out.println("Enter filename 1 (or hit <RETURN>): ");
        String f1 = in.readLine();
        if (f1 == null || f1.length() == -1)
            break;
        f1 = f1.trim();
        if (f1.length() == 0)
            break;
        System.out.println("Enter filename 2: ");
        String f2 = in.readLine();
        int id1 = findDocId(searcher, f1);
        if (id1 < 0) {
            System.out.println("No file " + f1 + " found in index!");
            break;
        }
        int id2 = findDocId(searcher, f2);
        if (id1 < 0) {
            System.out.println("No file " + f1 + " found in index!");
            break;
        }

        // Conversion to TF-IDF format
        TermWeight[] v1 = toTfIdf(reader, id1);
        TermWeight[] v2 = toTfIdf(reader, id2);

        System.out.println("The cosine similarity of the two files is: " + cosineSimilarity(v1, v2));

    }
    searcher.close();
    reader.close();
}

From source file:SearcherTest.java

@Before
public void setUp() throws Exception {
    dir = FSDirectory.open(Paths.get("E:\\lucene\\idx"));
    reader = DirectoryReader.open(dir);/*from w ww  .  ja  va  2  s  . com*/
    is = new IndexSearcher(reader);
}

From source file:SearchHelpDocs.java

License:Open Source License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {

    String usage = "Usage: java SearchFiles index-dir";
    if (args.length != 1) {
        System.out.println(usage);
        System.exit(0);/*from   w  ww.j  av a  2s . c o  m*/
    }
    String index = args[0];
    String field = LUC_KEY_CONTENT;
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String normsField = null;

    System.out.println("INFO: index-directory=" + index);
    IndexReader reader = IndexReader.open(index);
    if (normsField != null)
        reader = new OneNormsReader(reader, normsField);

    Searcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));

    String[] fields = { LUC_KEY_CONTENT, LUC_KEY_FULL_PATH, LUC_KEY_FILE_NAME };
    MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);

    printHelpInformation();
    while (true) {
        // prompt the user         
        System.out.print(CMDLINE_PREFIX);
        System.out.flush();
        String line = in.readLine();
        if (line == null || line.length() < 0)
            break;
        if (line.trim().length() == 0) {
            continue;
        }
        // Exit gracefully.
        if (line.trim().equalsIgnoreCase(":quit")) {
            System.out.println("INFO: quit successful");
            break;
        }

        // Modify for fuzzy query (E.g. ~0.58), also use wildcard postfix (*)
        line = line + "~";
        Object obj = parser.parse(line);
        Query query = parser.parse(line);
        System.out.println(CMDLINE_PREFIX + "Searching for: [" + line + "] query=" + query.toString(field));
        System.out.flush();
        // Search and also add the sort element
        Hits hits = searcher.search(query, createSort());
        if (repeat > 0) {
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                hits = searcher.search(query);
            }
            Date end = new Date();
            System.out.println(CMDLINE_PREFIX + "Time: " + (end.getTime() - start.getTime()) + "ms");
        }
        System.out.println(hits.length() + " total matching documents");
        for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
            int end = Math.min(hits.length(), start + HITS_PER_PAGE);
            for (int i = start; i < end; i++) {

                System.out.println(CMDLINE_PREFIX + "doc=" + hits.id(i) + " score=" + hits.score(i));

                // Ignore scores based on a certain threshold
                if (hits.score(i) < 0.09)
                    continue;

                Document doc = hits.doc(i);
                String path = doc.get(LUC_KEY_CONTENT);
                if (path != null) {
                    // Attempt to pretty print help document information
                    System.out.println("\n == Help Document Found; docid=" + hits.id(i));
                    System.out.println("*************************");
                    String fullpath = doc.get(LUC_KEY_FULL_PATH);
                    String filename = doc.get(LUC_KEY_FILE_NAME);
                    String content = doc.get(LUC_KEY_CONTENT);
                    String id = doc.get(LUC_KEY_IDENTITY);
                    if (filename != null) {
                        System.out.println("   +Filename: " + doc.get(filename));
                    }
                    if (fullpath != null) {
                        System.out.println("   +Path: " + doc.get(fullpath));
                    }
                    System.out.println("   id: " + id);
                    System.out.println(" == Content:");
                    System.out.println(prettyPrintContent(content));
                    System.out.println("-------------------------");

                    System.out.println();
                } else {
                    System.out.println((i + 1) + ". " + "No content for this document");
                }
            }
            if (queries != null) // non-interactive
                break;
            if (hits.length() > end) {
                System.out.print("more (y/n) ? ");
                line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'n')
                    break;
            }
        }
    }
    reader.close();
}

From source file:IrqaQuery.java

License:Apache License

public static List<Document> query(String index, String stoppath, String question, int numResult, String sim)
        throws Exception {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath)));

    if (sim.equals("TFIDF"))
        searcher.setSimilarity(new ClassicSimilarity());
    else if (sim.equals("BM25"))
        searcher.setSimilarity(new BM25Similarity());
    else//ww w. j a  v a2  s. c  o  m
        searcher.setSimilarity(new BM25Similarity());

    String field = "contents";
    QueryParser parser = new QueryParser(field, analyzer);
    Query query = parser.parse(parser.escape(question));

    TopDocs results = searcher.search(query, numResult);
    ScoreDoc[] hits = results.scoreDocs;
    List<Document> docs = new ArrayList<Document>();

    int numTotalHits = results.totalHits;
    //        System.out.println(numTotalHits + " total matching documents");

    int end = Math.min(numTotalHits, numResult);

    String searchResult = "";
    //        System.out.println("Only results 1 - " + hits.length);

    for (int i = 0; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);
        docs.add(doc);
    }

    return docs;
}

From source file:Get_Top_Documents_Based_on_Lucene.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);//from   w  w w . jav  a 2s . co m
    }

    //String index = "index";
    //String index = "index_wiki_2";
    String index = "index_external_links_v1/";
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 20;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    if (queries != null) {
        in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8);
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    }
    QueryParser parser = new QueryParser(field, analyzer);

    // Read Question in Training Data line by line
    //String path_train = "data/training_set.tsv";
    //String path_output = "data/lucene_search_result_train.txt";
    //String path_train = "data/validation_set.tsv";
    //String path_output = "data/lucene_search_result_validation_index_wiki_2.txt";
    String path_train = "data/training_set.tsv";
    String path_output = "data/lucene_search_result_train_index_wiki_external_links_v1.txt";
    Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path_output), "UTF-8"));
    try (BufferedReader br = new BufferedReader(new FileReader(path_train))) {
        String line;
        while ((line = br.readLine()) != null) {
            line = line.trim();
            String[] lst = line.split("\t");
            String query_s = lst[1];
            if (query_s == "question") {
                continue;
            }
            System.out.println("query_s: " + query_s);
            writer.write(query_s + "\t");
            try {
                Query query = parser.parse(query_s);
                System.out.println("Searching for: " + query.toString(field));
                doPagingSearch(in, writer, searcher, query, hitsPerPage, raw,
                        queries == null && queryString == null);
            } catch (org.apache.lucene.queryparser.classic.ParseException e) {
                continue;
            }
        } // while
    }
    writer.close();
    /*
    while (true) {
      if (queries == null && queryString == null) {                        // prompt the user
        System.out.println("Enter query: ");
      }
            
      String line = queryString != null ? queryString : in.readLine();
            
      if (line == null || line.length() == -1) {
        break;
      }
            
      line = line.trim();
      if (line.length() == 0) {
        break;
      }
              
      Query query = parser.parse(line);
      System.out.println("Searching for: " + query.toString(field));
            
      if (repeat > 0) {                           // repeat & time as benchmark
        Date start = new Date();
        for (int i = 0; i < repeat; i++) {
          searcher.search(query, 100);
        }
        Date end = new Date();
        System.out.println("Time: "+(end.getTime()-start.getTime())+"ms");
      }
            
      doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);
            
      if (queryString != null) {
        break;
      }
    } // while (True)
    */
    reader.close();
}

From source file:QueryLuceneIndex.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\t[-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);/*from   www  . java  2  s  . c  o  m*/
    }

    String index = "/home/chrisschaefer/enwiki-20130604-lucene_bkp";
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 10;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);

    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }
    QueryParser parser = new QueryParser(Version.LUCENE_43, field, analyzer);
    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }
        //Query query = new TermQuery(new Term("title", line));
        Query query = parser.parse(line);
        //System.out.println("Searching for: " + query.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, null, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}