Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:ikanalyzer.LuceneIndexAndSearchDemo.java

License:Apache License

/**
 *  ???//from   w  ww. ja v a  2 s  . com
 * 
 * @param args
 */
public static void main(String[] args) {
    // Lucene Document??
    String fieldName = "text";
    // 
    String text = "IK Analyzer???????";

    // IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer(true);

    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        // 
        directory = new RAMDirectory();

        // ?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        // 
        Document doc = new Document();
        doc.add(new StringField("ID", "10000", Field.Store.YES));
        doc.add(new TextField(fieldName, text, Field.Store.YES));
        iwriter.addDocument(doc);
        iwriter.close();

        // ?**********************************
        // ?
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        // QueryParser?Query
        QueryParser qp = new QueryParser(fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        // ?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        // 
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:in.student.project.queryexpansion.SearchFilesLDA.java

License:Apache License

/**
 * Generates necessary output - in this case this output is used as input to matlab
 * @param hits/*w w  w.  j  a v  a 2s.c  o  m*/
 * @param terms
 * @param query_num - tag of the query
 * @param writer
 * @param termCount
 * @param outCount
 * @param idxReader 
 * @param similarity 
 * @param searcher 
 * @throws IOException 
 */
private static void generateOutput(TopDocs hits, Vector<TermQuery> terms, String query_num,
        BufferedWriter writer, int termCount, int outCount, IndexSearcher searcher, TFIDFSimilarity similarity,
        IndexReader idxReader) throws IOException {
    logger.finer("terms.size(): " + terms.size());
    // Generate Output
    // For each doc
    for (int i = 0; ((i < hits.scoreDocs.length) && (i < outCount)); i++) {
        Document doc = searcher.doc(hits.scoreDocs[i].doc);
        String docno = ((Field) doc.getField("DOCNO")).stringValue();
        int docId = hits.scoreDocs[i].doc;
        float coord = Utils.coord(terms, doc, docId, similarity, idxReader);

        writer.write(query_num + " " + "Q0" + " " + docno + " " + (i + 1) + " " + hits.scoreDocs[i].score + " "
                + coord);
        // For each term output normalized: tf, idf, boostFactor
        for (int j = 0; j < termCount; j++) {
            if (j < terms.size()) {
                TermQuery termQuery = terms.elementAt(j);
                Term term = termQuery.getTerm();
                String termStr = term.text();
                float tf = Utils.getTFNorm(termStr, doc, docId, similarity, idxReader, true);
                float idf = Utils.getIDFNorm(termStr, terms, idxReader, similarity, true);
                float boost = Utils.getBoostNorm(termQuery, terms);
                writer.write(" " + tf + " " + idf + " " + boost + " ");
            }
            // If not enough terms pad with 0's
            else {
                writer.write(" 0 0 0 ");
            }
        }
        writer.write("\n");
    }
}

From source file:in.student.project.search.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents 
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits.//from w w  w  . ja v  a 2 s.co m
 * 
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");
            if (path != null) {
                System.out.println((i + 1) + ". " + path);
                String title = doc.get("title");
                System.out.println("   Title: " + doc.get("title"));
                System.out.println("   " + doc.get("summary"));

            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:index.IndexUtils.java

public static List highlight(IndexSearcher indexSearcher, String key) throws ClassNotFoundException {
    try {/* w ww .ja va  2  s. com*/
        QueryParser queryParser = new QueryParser("name", new StandardAnalyzer());
        Query query = queryParser.parse(key);
        TopDocCollector collector = new TopDocCollector(800);
        indexSearcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        Highlighter highlighter = null;
        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
        highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
        highlighter.setTextFragmenter(new SimpleFragmenter(200));
        List list = new ArrayList();
        Document doc;
        for (int i = 0; i < hits.length; i++) {
            //System.out.println(hits[i].score);  
            doc = indexSearcher.doc(hits[i].doc);
            TokenStream tokenStream = new StandardAnalyzer().tokenStream("name",
                    new StringReader(doc.get("name")));
            //                IndexResult ir = getIndexResult(doc,"index.IndexResult");  
            //                ir.setName(highlighter.getBestFragment(tokenStream, doc.get("name")));  
            //                list.add(ir);  
        }
        return list;
    } catch (ParseException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    return null;

}

From source file:index.reader.LuceneQuery.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits.//from  w ww. j av a 2s  .c o  m
 * <p/>
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("resource_uri");
            if (path != null) {
                System.out.println((i + 1) + ". " + path);
                String title = doc.get("title");
                if (title != null) {
                    System.out.println("   Title: " + doc.get("title"));
                }
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:indexer.Retriever.java

private void calculateIFIDF(IndexReader reader, IndexSearcher searcher, String words, TopDocs results)
        throws IOException {
    ScoreDoc[] hits = results.scoreDocs;
    System.out.println("\nIDF");
    for (String word : words.split(" ")) {
        System.out.println(getIDF(reader, word));
    }//w ww. ja  v  a 2  s.  c om
    System.out.println();

    int counter = 1;
    for (ScoreDoc hit : hits) {
        Document doc = searcher.doc(hit.doc);
        System.out.println(counter + ". " + doc.get(Constants.titleField) + " (score: " + hit.score + ")");
        for (String word : words.split(" ")) {
            System.out.println(getTF(reader, hit.doc, word));
        }
        counter++;
    }
}

From source file:indexer.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents 
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits.//from  www  .  j  av  a2 s .c o  m
 * 
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
* @return 
 * 
 */
public static String[] doPagingSearch(IndexSearcher searcher, Query query, int hitsPerPage, boolean raw,
        boolean interactive) throws IOException {

    ArrayList<String> docPaths = new ArrayList<String>();
    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = numTotalHits;
    //    int end = Math.min(numTotalHits, hitsPerPage);

    //   while (true) {
    if (end > hits.length) {
        //  System.out.println("Only results 1 - " + hits.length +" of " + numTotalHits + " total matching documents collected.");
        //  System.out.println("Collect more (y/n) ?");
        /*     String line = in.readLine();
             if (line.length() == 0 || line.charAt(0) == 'n') {
               break; 
             }    */

        hits = searcher.search(query, numTotalHits).scoreDocs;
    }

    //    end = Math.min(hits.length, start + hitsPerPage);

    for (int i = start; i < end; i++) {
        if (raw) { // output raw format
            System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
            continue;
        }

        Document doc = searcher.doc(hits[i].doc);
        String path = doc.get("path");
        if (path != null) {
            docPaths.add(path); //Add to arraylist of candidate document paths
            //  System.out.println((i+1) + ". " + path);
            String title = doc.get("title");
            if (title != null) {
                System.out.println("   Title: " + doc.get("title"));
            }
        } else {
            System.out.println((i + 1) + ". " + "No path for this document");
        }

    }

    /*   if (!interactive || end == 0) {
         break;
       }
            
       if (numTotalHits >= end) {
         boolean quit = false;
         while (true) {
           System.out.print("Press ");
           if (start - hitsPerPage >= 0) {
    System.out.print("(p)revious page, ");  
           }
           if (start + hitsPerPage < numTotalHits) {
    System.out.print("(n)ext page, ");
           }
           System.out.println("(q)uit or enter number to jump to a page.");
                   
           String line = in.readLine();
           if (line.length() == 0 || line.charAt(0)=='q') {
    quit = true;
    break;
           }
           if (line.charAt(0) == 'p') {
    start = Math.max(0, start - hitsPerPage);
    break;
           } else if (line.charAt(0) == 'n') {
    if (start + hitsPerPage < numTotalHits) {
      start+=hitsPerPage;
    }
           break;
           } else {
    int page = Integer.parseInt(line);
    if ((page - 1) * hitsPerPage < numTotalHits) {
      start = (page - 1) * hitsPerPage;
      break;
    } else {
      System.out.println("No such page");
    }
           }
         }
         if (quit) break;
         end = Math.min(numTotalHits, start + hitsPerPage);
       }   
     }*/

    String[] paths = new String[docPaths.size()];
    docPaths.toArray(paths);
    return paths;
}

From source file:info.extensiblecatalog.OAIToolkit.db.LuceneSearcher.java

License:Open Source License

public String getLatestDatestamp() {
    String latest = null;//from ww  w.  j av a  2  s.com
    try {
        IndexSearcher searcher = getSearcher();
        Document doc = searcher.doc(searcher.maxDoc() - 1);
        Field[] flds = doc.getFields("modification_date");
        // this field is stored in order (if it weren't we'd have to sort them first)
        // most recent is at the top of the list
        latest = flds[0].stringValue();
        prglog.info("getLatestDatestamp:" + latest);

        // It's extremely possible that the max doc id is NOT the most recent record
        // we just need to narrow our range search to something reasonable
        Sort sort = new Sort(new SortField("modification_date", SortField.STRING, true));
        String queryString = "+modification_date:[\"" + latest + "\" TO \""
                + TextUtil.utcToMysqlTimestamp(TextUtil.nowInUTC()) + "\"]";
        prglog.info("queryString for latest datestamp:" + queryString);
        TopDocs hits = search(queryString, sort, 1);

        if (hits.scoreDocs.length > 0) {
            int id = hits.scoreDocs[0].doc;
            doc = searcher.doc(id);
            flds = doc.getFields("modification_date");
            latest = flds[0].stringValue();
        }
        prglog.info("getLatestDatestamp pass two:" + latest);

    } catch (Exception e) {
        prglog.error("[PRG] " + e);
    }
    return latest;
}

From source file:info.johtani.jjug.lucene.sample.SearcherSample.java

License:Apache License

public static void main(String[] args) {

    String indexDirectory = "./indexdir";
    //String keyword = "";
    String keyword = "johtani";
    IndexReader reader = null;//from  w w  w.j a v  a 2s. com

    try {
        //??
        Directory dir = FSDirectory.open(new File(indexDirectory));
        //IndexReader??
        reader = DirectoryReader.open(dir);
        //IndexSearcher??
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer;
        //Standard
        analyzer = new StandardAnalyzer();
        //
        //analyzer = new JapaneseAnalyzer();
        //??
        QueryParser parser = new QueryParser("content", analyzer);
        //????
        Query query = parser.parse(keyword);
        //?????1?????
        TopDocs hits = searcher.search(query, 10);

        //????
        System.out.println("Found " + hits.totalHits + " document(s)");

        //????
        for (ScoreDoc scoreDoc : hits.scoreDocs) {
            System.out.println("--- " + scoreDoc.doc + " ---");
            //ID??
            Document doc = searcher.doc(scoreDoc.doc);
            //???
            System.out.println(doc.get("content"));
            System.out.println("---------");
        }

    } catch (IOException | ParseException e) {
        e.printStackTrace();
    } finally {
        try {
            if (reader != null) {
                reader.close();
            }
        } catch (IOException e) {
            // ignore
        }
    }

}

From source file:InformationRetrieval.Search.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents 
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits.//from   w w  w  .j  av a 2  s.  c o m
 * 
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
* @return 
 * 
 */
public static ArrayList<Store> doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query,
        int hitsPerPage, boolean raw, boolean interactive) throws IOException {

    ArrayList<Store> list1 = new ArrayList<Store>();
    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    // list1.add(numTotalHits +" total matching documents");
    // System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = "n";
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) {
                // output raw format
                //list1.add("doc="+hits[i].doc+" score="+hits[i].score);
                // System.out.println("doc="+hits[i].doc+" score="+hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");
            if (path != null) {
                //   System.out.println((i+1) + ". " + path);
                //list1.add((i+1) + ". " + path);

                BufferedReader OutWriter = new BufferedReader(new FileReader(path));
                String line = OutWriter.readLine();
                String title1 = OutWriter.readLine();
                String s1 = "NA", s2 = "NA", s3 = "NA";
                while (OutWriter.readLine() != null) {
                    s1 = OutWriter.readLine().trim();
                    break;
                }
                while (OutWriter.readLine() != null) {
                    s2 = OutWriter.readLine().trim();
                    break;
                }
                while (OutWriter.readLine() != null) {
                    //      s3 = OutWriter.readLine().trim();
                    break;
                }

                String small = s1 + " " + s2 + " " + s3;
                Store a = new Store(title1, line, small);
                list1.add(a);
                //System.out.println(a.getsnippet() + "--------------------");

                //System.out.println(a.gettitle());
                //System.out.println(a.geturl());

                //System.out.println(line);
                //fileWriter.write(line);
                //fileWriter.newLine();

                String title = doc.get("title");
                if (title != null) {
                    //    System.out.println("   Title: " + doc.get("title"));
                    // list1.add("   Title: " + doc.get("title"));

                }
            } else {
                //  System.out.println((i+1) + ". " + "No path for this document");
                // list1.add((i+1) + ". " + "No path for this document");

            }

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = "q";
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
    return list1;
}