Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:com.netcrest.pado.index.provider.lucene.LuceneSearchRAMDirectory.java

License:Open Source License

private List<Object> getIdentityKeyList(String queryString, RAMDirectory dir) {
    List<Object> list = new ArrayList<Object>();
    IndexReader reader;/*from  ww  w. j  ava 2 s  . c o m*/
    try {
        reader = IndexReader.open(dir);
    } catch (CorruptIndexException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    } catch (IOException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    }
    IndexSearcher searcher = new IndexSearcher(reader);

    Query query;
    try {
        query = parser.parse(queryString, "IdentityKey");
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    }
    TopDocs results;
    try {
        results = searcher.search(query, null, Integer.MAX_VALUE);
    } catch (IOException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    }
    for (ScoreDoc hit : results.scoreDocs) {
        Document doc;
        try {
            doc = searcher.doc(hit.doc);
        } catch (CorruptIndexException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        } catch (IOException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
        IndexableField field = doc.getField("IdentityKey");
        if (field == null) {
            continue;
        }
        Object identityKey = null;
        BytesRef br = field.binaryValue();
        if (br != null) {
            byte[] blob = br.bytes;
            try {
                identityKey = BlobHelper.deserializeBlob(blob);
                list.add(identityKey);
            } catch (Exception ex) {
                Logger.warning("Identity key deserialization error", ex);
            }
        } else {
            identityKey = field.stringValue();
            list.add(identityKey);
        }
    }
    return list;
}

From source file:com.netcrest.pado.index.provider.lucene.TopNLuceneSearch.java

License:Open Source License

protected Set<Object> getIdentityKeySet(String queryString, Directory dir) {
    Set<Object> identityKeySet = new HashSet<Object>();
    DirectoryReader reader;//from w  w w .j  a va 2 s. c  om
    try {
        reader = DirectoryReader.open(dir);
    } catch (CorruptIndexException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    } catch (IOException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    }

    Query query;
    try {
        StandardQueryParser parser = new StandardQueryParser(new StandardAnalyzer(LUCENE_VERSION));
        query = parser.parse(queryString.replaceAll("\\-", "\\\\-"), "__doc");
    } catch (Exception ex) {
        // Lucene bug. Unable to serialize exception. Log the message and
        // throw a new exception with the string message.
        ex.printStackTrace();
        throw new PadoException(ex.getMessage());
    }
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs results;
    try {
        results = searcher.search(query, null, Integer.MAX_VALUE);
        for (ScoreDoc hit : results.scoreDocs) {
            Document doc;
            try {
                doc = searcher.doc(hit.doc);
            } catch (CorruptIndexException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            } catch (IOException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            }
            // IndexableField field = doc.getField("IdentityKey");
            // if (field == null) {
            // continue;
            // }
            // Object identityKey = field.stringValue();
            // if (identityKey == null) {
            // identityKey = field.numericValue();
            // }
            // if (identityKey == null) {
            // BytesRef br = field.binaryValue();
            // if (br != null) {
            // byte[] blob = br.bytes;
            // try {
            // identityKey = BlobHelper.deserializeBlob(blob);
            // identityKeySet.add(identityKey);
            // } catch (Exception ex) {
            // Logger.warning("Identity key deserialization error", ex);
            // }
            // } else {
            // identityKey = field.toString();
            // }
            // }
            LuceneField luceneField = new LuceneField();
            ITemporalKey temporalKey = luceneField.getTemporalKey(doc);

            if (temporalKey != null) {
                float docScore = hit.score;
                identityKeySet.add(temporalKey.getIdentityKey());
            }
        }
    } catch (IOException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    }
    return identityKeySet;
}

From source file:com.nuvolect.deepdive.lucene.Search.java

/**
 * Return results for a search along a specific path.  If the path is changed or new
 * create an index./*from  w w w  . j  av  a  2 s .c o m*/
 * @param searchQuery
 * @param searchPath
 * @return
 */
public static JSONObject search(String searchQuery, String volumeId, String searchPath) {

    JSONObject result = new JSONObject();
    JSONArray jsonArray = new JSONArray();
    Context ctx = App.getContext();

    DirectoryReader ireader = null;
    ScoreDoc[] scoreDocs = null;
    String error = "";

    preSearch(volumeId, searchPath);
    try {
        ireader = DirectoryReader.open(m_directory);
    } catch (IOException e) {
        LogUtil.logException(LogUtil.LogType.SEARCH, e);
        error += e.toString();
    }
    IndexSearcher isearcher = new IndexSearcher(ireader);
    Query query = null;

    try {

        LogUtil.log(LogUtil.LogType.SEARCH,
                "query: " + searchQuery + ", vid: " + volumeId + ", path: " + searchPath);

        // Parse a simple query that searches for "text":
        QueryParser parser = new QueryParser(CConst.FIELD_CONTENT, m_analyzer);
        query = parser.parse(searchQuery);
        TopScoreDocCollector collector = TopScoreDocCollector.create(MAX_HITS);
        isearcher.search(query, collector);
        scoreDocs = collector.topDocs().scoreDocs;

    } catch (ParseException | IOException e) {
        LogUtil.logException(LogUtil.LogType.SEARCH, e);
        error += e.toString();
    }
    // Iterate through the results creating an object for each file
    HashMap<String, Integer> hitCounts = new HashMap<>();
    HashMap<String, Integer> hitIndexes = new HashMap<>();

    /**
     * First iterate the hit list and count duplicates based on file path.
     */
    for (int ii = 0; scoreDocs != null && ii < scoreDocs.length; ++ii) {

        Document hitDoc = null;
        int fileHits = 1;
        try {
            hitDoc = isearcher.doc(scoreDocs[ii].doc);

            Explanation explanation = isearcher.explain(query, scoreDocs[ii].doc);
            Explanation[] details = explanation.getDetails();
            String description = details[0].getDescription();

            /**
             * FIXME, find a better way to count hits in each file
             */
            if (description.contains("=")) {

                String[] lineParts = description.split("=");
                String[] elementParts = lineParts[2].split(Pattern.quote(")"));
                if (elementParts.length > 0) {

                    fileHits = ((int) Double.parseDouble(elementParts[0]));
                }
            }

        } catch (IOException e) {
            LogUtil.logException(LogUtil.LogType.SEARCH, e);
            error += e.toString();
        }
        String filePath = hitDoc.get((CConst.FIELD_PATH));

        if (hitCounts.containsKey(filePath)) {

            hitCounts.put(filePath, hitCounts.get(filePath) + fileHits);
        } else {
            hitCounts.put(filePath, fileHits);
            hitIndexes.put(filePath, ii);
        }
    }

    /**
     * Iterate over each unique hit and save the results
     */
    for (Map.Entry<String, Integer> uniqueHit : hitIndexes.entrySet()) {

        Document hitDoc = null;
        try {
            hitDoc = isearcher.doc(scoreDocs[uniqueHit.getValue()].doc);
        } catch (IOException e) {
            LogUtil.logException(LogUtil.LogType.SEARCH, e);
            error += e.toString();
        }
        String file_name = hitDoc.get((CConst.FIELD_FILENAME));
        String file_path = hitDoc.get((CConst.FIELD_PATH));
        try {
            String folder_url = OmniHash.getStartPathUrl(ctx, volumeId, file_path);

            JSONObject hitObj = new JSONObject();
            hitObj.put("volume_id", volumeId);
            hitObj.put("file_path", file_path);
            hitObj.put("file_name", file_name);
            hitObj.put("folder_url", folder_url);
            hitObj.put("num_hits", hitCounts.get(file_path));
            hitObj.put("error", error);
            jsonArray.put(hitObj);

        } catch (Exception e) {
            LogUtil.logException(LogUtil.LogType.SEARCH, e);
        }
    }
    int num_hits = scoreDocs != null ? scoreDocs.length : 0;

    try {
        result.put("hits", jsonArray != null ? jsonArray : new JSONArray());
        result.put("num_hits", num_hits);
        result.put("error", error);

        ireader.close();
        m_directory.close();

    } catch (JSONException | IOException e) {
        LogUtil.logException(LogUtil.LogType.SEARCH, e);
    }

    return result;
}

From source file:com.paladin.action.SearchAction.java

License:Apache License

/**
 * search using lucene/*from www.  jav  a2  s.c o  m*/
 *
 * @param jsonObject
 * @param request
 * @param _query
 * @throws IOException
 * @throws ParseException
 */

private void _search(JSONObject jsonObject, HttpServletRequest request, String _query, String _table)
        throws IOException, ParseException, InvalidTokenOffsetsException {

    //  Bean ???
    final String index_dir = Constants.LUCENE_INDEX_ROOT + _table;

    IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(index_dir)));
    QueryParser parser = new QueryParser(Version.LUCENE_33, INDEX_FIELDS, new IKAnalyzer(false));
    TopScoreDocCollector collector = TopScoreDocCollector.create(10000, true);

    for (String key : _query.split(" ")) {
        Query query = parser.parse(key);
        searcher.search(query, collector);

        //    IKSimilarity  
        searcher.setSimilarity(new IKSimilarity());

        // 
        int size = collector.getTotalHits();
        total_pages = (size + Constants.NUM_PER_PAGE_SEARCH - 1) / Constants.NUM_PER_PAGE_SEARCH;

        curr_page_number = getCurrentPage(request, 1, total_pages);

        // ?
        first_page = curr_page_number - 5 > 0 ? curr_page_number - 5 : 1;
        last_page = first_page + 10 >= total_pages ? total_pages : first_page + 10;

        // ?
        int begin = (curr_page_number - 1) * Constants.NUM_PER_PAGE_SEARCH;
        ScoreDoc[] score_docs = collector.topDocs(begin, Constants.NUM_PER_PAGE_SEARCH).scoreDocs;

        List<Document> doc_list = new ArrayList<Document>();
        for (ScoreDoc score_doc : score_docs)
            doc_list.add(searcher.doc(score_doc.doc));

        List<Map<String, String>> blog_list = getBlogListFromDocList(query, doc_list);

        jsonObject.put(_table + "_list", blog_list);
        jsonObject.put("p_start_" + _table, first_page);
        jsonObject.put("p_end_" + _table, last_page);
        jsonObject.put("curr_page_" + _table, curr_page_number);
        jsonObject.put("total_page_" + _table, total_pages);
        jsonObject.put("total_count_" + _table, size);
    }
}

From source file:com.paladin.sys.lucene.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits.//from  w ww.j  a  v a2 s. c  om
 * <p/>
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
 */
public static void doPagingSearch(final BufferedReader in, final IndexSearcher searcher, final Query query,
        boolean raw, boolean interactive) throws IOException {
    //?5?
    TopDocs top_result = searcher.search(query, 5 * numPerPage);

    // 
    int total_hits = top_result.totalHits;
    out.println(" " + total_hits + " ??");

    // ID
    int start = 0;
    int end = Math.min(total_hits, numPerPage);// ????

    // ?
    ScoreDoc[] hits = top_result.scoreDocs;
    while (true) {
        if (end > hits.length) {
            out.print("?? " + total_hits + " ? 1 - " + hits.length
                    + " ??(y/n) ");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n')
                break;
            hits = searcher.search(query, total_hits).scoreDocs;
        }

        end = Math.min(hits.length, start + numPerPage);
        for (int i = start; i < end; i++) {
            if (raw) {
                out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }
            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");
            if (path != null) {
                System.out.println((i + 1) + ".  " + path);
                String title = doc.get("title");
                if (title != null)
                    out.println("   : " + doc.get("title"));
            } else
                out.println((i + 1) + ". " + " path ");
        }

        // ??
        if (!interactive || end == 0)
            break;

        // 
        if (total_hits >= end) {
            boolean quit = false;
            while (true) {
                if (start - numPerPage >= 0)
                    out.print("(p), ");
                if (start + numPerPage < total_hits)
                    out.print("(n), ");
                out.println("(q)  (...)");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - numPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + numPerPage < total_hits)
                        start += numPerPage;
                    break;
                } else {
                    try {
                        int page = Integer.parseInt(line);
                        if ((page - 1) * numPerPage < total_hits) {
                            start = (page - 1) * numPerPage;
                            break;
                        } else
                            out.println("??");
                    } catch (NumberFormatException e) {
                        out.println("???");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(total_hits, start + numPerPage);
        }
    }
}

From source file:com.parallax.server.blocklyprop.servlets.HelpSearchServlet.java

@Override
protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
    String queryText = req.getParameter("query");
    if (Strings.isNullOrEmpty(queryText)) {
        req.getRequestDispatcher("/WEB-INF/servlet/help/help-results.jsp").forward(req, resp);
        return;//from   www . ja  v a  2 s. c om
    }

    try {
        IndexSearcher indexSearcher = initialize();
        if (indexSearcher == null) {
            // TODO
            return;
        }

        Query query = parser.parse(queryText);
        TopDocs hits = indexSearcher.search(query, 10);

        if (hits.totalHits > 0) {
            StringBuilder s = new StringBuilder();

            for (ScoreDoc scoreDoc : hits.scoreDocs) {
                Document document = indexSearcher.doc(scoreDoc.doc);
                String title = document.get("title");
                String shortDescription = document.get("short");
                String path = document.get("path");

                s.append("<div class='result'><div class='result-body'><h4><a href='help?f=").append(path)
                        .append("'>").append(title).append("</a></h4>");
                if (shortDescription != null) {
                    s.append("<p>");
                    s.append(shortDescription);
                    s.append("</p>");
                }
                s.append("</div></div>");
            }

            req.setAttribute("html", s.toString());
            req.getRequestDispatcher("/WEB-INF/servlet/help/help-results.jsp").forward(req, resp);
        } else {
            req.getRequestDispatcher("/WEB-INF/servlet/help/help-results.jsp").forward(req, resp);
        }
    } catch (ParseException ex) {
        Logger.getLogger(HelpSearchServlet.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:com.partydj.search.LuceneSearchProvider.java

License:Open Source License

@Override
public Collection<MediaFile> find(Map<String, Collection<String>> queryParameters) {
    StringBuilder queryString = new StringBuilder();
    if (queryString == null || queryString.length() == 0) {
        return Collections.emptyList();
    } else {/*  w  ww  .jav  a2s  .co m*/
        String any = getFirst(queryParameters, ANY);
        if (any != null) {
            queryString.append(INDEX_ENCODER.encode(any));
        } else {
            //$MR todo
        }
        List<MediaFile> found = new ArrayList();
        try {
            Query query = new QueryParser(Version.LUCENE_CURRENT, ANY, analyzer).parse(queryString.toString());
            Integer hitsPerPage = getFirstInteger(queryParameters, MAX_RESULTS);
            if (hitsPerPage == null) {
                hitsPerPage = DEFAULT_MAX_RESULTS;
            }
            IndexSearcher searcher = new IndexSearcher(index, true);
            TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
            //$MR jaro-winlker scorer
            searcher.search(query, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;

            for (ScoreDoc hit : hits) {
                Document d = searcher.doc(hit.doc);
                found.add(indexToMediaFile.get(d));
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return found;
    }
}

From source file:com.qahit.jbug.LuceneManager.java

public static ArrayList<String> search(String terms, int n, QueryParser.Operator defaultOperator) {
    IndexSearcher indexSearcher = null;

    try {/*from   w w w.ja v  a 2  s.  co  m*/
        indexSearcher = searcherManager.acquire();
        MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_45,
                new String[] { "easiness", "targetmilestone", "version", "component", "reporter", "product",
                        "description", "comments", "title", "status", "assignedto", "bug_id", "priority" },
                analyzer);
        queryParser.setDefaultOperator(defaultOperator);
        Query query = queryParser.parse(terms);

        ScoreDoc[] scoreDocs = indexSearcher.search(query, interval).scoreDocs;
        ArrayList<String> result = new ArrayList<>();
        for (ScoreDoc scoreDoc : scoreDocs) {
            Document doc = indexSearcher.doc(scoreDoc.doc);
            result.add(doc.get("bug_id"));
        }
        return result;
    } catch (IOException | ParseException ex) {
        log.error("Error while searching", ex);
    } finally {
        try {
            searcherManager.release(indexSearcher);
        } catch (IOException ex) {
            log.error("Error while releasing indexSearcher", ex);
        }
    }
    return null;
}

From source file:com.radialpoint.word2vec.lucene.SearchFiles.java

License:Open Source License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents pages of size n to the user.
 * The user can then go to the next page if interested in the next hits.
 * /*from  ww w  .ja  va 2 s  . c om*/
 * When the query is executed for the first time, then only enough results are collected to fill 5 result pages. If
 * the user wants to page beyond this limit, then the query is executed another time and all hits are collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    // test hook
    cachedHits.clear();

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");
            if (path != null) {
                System.out.println((i + 1) + ". " + path);
                cachedHits.add(path);
                String title = doc.get("title");
                if (title != null) {
                    System.out.println("   Title: " + doc.get("title"));
                }
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:com.rapidminer.search.GlobalSearchHandler.java

License:Open Source License

/**
 * Creates the search result for search methods.
 *
 * @param searchTerm/*from w ww . java  2 s.c om*/
 *       the search string
 * @param searcher
 *       the index searcher instance which was used to search
 * @param result
 *       the result of the search
 * @param highlightResult
 *       if {@code true}, the {@link GlobalSearchResult#getBestFragments()} will be created
 * @return the search result instance, never {@code null}
 * @throws IOException
 *       if something goes wrong
 */
private GlobalSearchResult createSearchResult(final String searchTerm, final Query parsedQuery,
        final IndexSearcher searcher, final TopDocs result, final boolean highlightResult) throws IOException {
    int resultNumber = result.scoreDocs.length;
    List<Document> resultList = new ArrayList<>(resultNumber);
    List<String[]> highlights = highlightResult ? new LinkedList<>() : null;
    ScoreDoc lastResult = resultNumber > 0 ? result.scoreDocs[result.scoreDocs.length - 1] : null;
    for (ScoreDoc scoreDoc : result.scoreDocs) {
        Document doc = searcher.doc(scoreDoc.doc);
        resultList.add(doc);

        if (highlightResult) {
            // search result highlighting best match on name field
            QueryScorer scorer = new QueryScorer(parsedQuery);
            Highlighter highlighter = new Highlighter(HIGHLIGHT_FORMATTER, scorer);
            Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, FRAGMENT_SIZE);
            highlighter.setTextFragmenter(fragmenter);
            try {
                TokenStream stream = TokenSources.getTokenStream(GlobalSearchUtilities.FIELD_NAME,
                        searcher.getIndexReader().getTermVectors(scoreDoc.doc),
                        doc.get(GlobalSearchUtilities.FIELD_NAME), GlobalSearchUtilities.ANALYZER,
                        Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE - 1);
                if (stream != null) {
                    highlights.add(highlighter.getBestFragments(stream,
                            doc.get(GlobalSearchUtilities.FIELD_NAME), MAX_NUMBER_OF_FRAGMENTS));
                } else {
                    highlights.add(null);
                }
            } catch (InvalidTokenOffsetsException e) {
                highlights.add(null);
            }
        }
    }
    return new GlobalSearchResult(resultList, searchTerm, lastResult, result.totalHits, highlights);
}