Example usage for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:com.github.tteofili.looseen.MinHashClassifier.java

License:Apache License

List<ClassificationResult<BytesRef>> buildListFromTopDocs(IndexSearcher searcher, String categoryFieldName,
        TopDocs topDocs, int k) throws IOException {
    Map<BytesRef, Integer> classCounts = new HashMap<>();
    Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs
    float maxScore = topDocs.getMaxScore();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        IndexableField storableField = searcher.doc(scoreDoc.doc).getField(categoryFieldName);
        if (storableField != null) {
            BytesRef cl = new BytesRef(storableField.stringValue());
            //update count
            Integer count = classCounts.get(cl);
            if (count != null) {
                classCounts.put(cl, count + 1);
            } else {
                classCounts.put(cl, 1);//from ww w . j av  a2s.c o  m
            }
            //update boost, the boost is based on the best score
            Double totalBoost = classBoosts.get(cl);
            double singleBoost = scoreDoc.score / maxScore;
            if (totalBoost != null) {
                classBoosts.put(cl, totalBoost + singleBoost);
            } else {
                classBoosts.put(cl, singleBoost);
            }
        }
    }
    List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
    List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>();
    int sumdoc = 0;
    for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
        Integer count = entry.getValue();
        Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1
        temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k));
        sumdoc += count;
    }

    //correction
    if (sumdoc < k) {
        for (ClassificationResult<BytesRef> cr : temporaryList) {
            returnList.add(
                    new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
        }
    } else {
        returnList = temporaryList;
    }
    return returnList;
}

From source file:com.github.wxiaoqi.search.lucene.LuceneDao.java

License:Open Source License

public TableResultResponse<IndexObject> page(Integer pageNumber, Integer pageSize, String keyword) {

    IndexReader indexReader = null;/*from   w  w  w .ja  va2 s  . c o  m*/
    TableResultResponse<IndexObject> pageQuery = null;
    List<IndexObject> searchResults = new ArrayList<>();
    try {
        indexReader = DirectoryReader.open(this.getDirectory());
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        Query query = QueryUtil.query(keyword, this.getAnalyzer(), "title", "descripton");
        ScoreDoc lastScoreDoc = this.getLastScoreDoc(pageNumber, pageSize, query, indexSearcher);
        /*?documentsearchAfter */
        TopDocs topDocs = indexSearcher.searchAfter(lastScoreDoc, query, pageSize);
        Highlighter highlighter = this.addStringHighlighter(query);
        log.info("??{}", keyword);
        log.info("{}", topDocs.totalHits);
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            int docID = scoreDoc.doc;
            float score = scoreDoc.score;
            Document document = indexSearcher.doc(docID);
            IndexObject indexObject = DocumentUtil.document2IndexObject(this.getAnalyzer(), highlighter,
                    document, score);
            searchResults.add(indexObject);
            log.info("" + score);
        }
        Collections.sort(searchResults);
        pageQuery = new TableResultResponse<>(topDocs.totalHits, searchResults);

    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        try {
            indexReader.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    return pageQuery;
}

From source file:com.globalsight.ling.lucene.Hits.java

License:Apache License

private void dealHits(IndexSearcher searcher, ScoreDoc[] p_hits, int end, int begin, float p_minScore,
        String text) throws IOException {
    m_hits = new ArrayList(end);
    text = text.toLowerCase();// w ww .j a v a 2s. co  m

    if (p_hits.length < end) {
        end = p_hits.length;
    }

    for (int i = begin, max = end; i < max; i++) {
        float score = p_hits[i].score;

        if (score < p_minScore) {
            break;
        }

        Document doc = searcher.doc(p_hits[i].doc);

        String str = doc.get(IndexDocument.TEXT).toLowerCase();

        if (text.indexOf(str) > -1 || str.indexOf(text) > -1) {
            m_hits.add(new Hit(doc.get(IndexDocument.MAINID), doc.get(IndexDocument.SUBID),
                    doc.get(IndexDocument.TEXT), score));
        }
    }
}

From source file:com.globalsight.ling.lucene.Index.java

License:Apache License

/**
 * Executes a search in the index returning no more than p_maxHits
 * (suggested: 5-10), and having no score smaller than p_minScore.
 *
 * This implementation is based on Lucene and Lucene score values
 * float widely, making it hard to specify a useful cut-off like
 * 0.7 or 0.5. Good scores can be < 0.2. All that is guaranteed is
 * that scores are numerically ordered. Use p_maxHits instead.
 *//*from  w  ww .  ja  va2s. c o  m*/
public Hits search(String p_text, int end, int begin, float p_minScore)
        throws IOException, InterruptedException {
    synchronized (m_state) {
        if (m_state != STATE_OPENED) {
            throw new IOException("index is not available");
        }
    }

    try {
        m_lock.readLock().acquire();

        try {
            // Search the current index.
            //IndexReader reader = DirectoryReader.open(m_fsDir);
            //IndexSearcher searcher = new IndexSearcher(reader);
            IndexSearcher searcher = LuceneCache.getLuceneCache(m_directory).getIndexSearcher();

            Query query = getQuery(p_text);

            int maxHits = end - begin;
            TopDocs topDocs = searcher.search(query, maxHits);

            if (topDocs.totalHits > 0) {
                noResult = false;
            }

            // Store results in our own object.
            Hits result = new Hits(searcher, topDocs.scoreDocs, end, begin, p_minScore, p_text);

            // Highlight query terms in long results.
            if (m_type == TYPE_TEXT) {
                // Note: rewrite MultiTermQuery, RangeQuery or PrefixQuery.

                // TODO: optimize object creation if it all works.
                Highlighter highlighter = new Highlighter(new SimpleFormatter(), new QueryScorer(query));

                int max = Math.min(end, topDocs.totalHits);
                for (int i = begin; i < max; i++) {
                    Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
                    String text = doc.get(IndexDocument.TEXT);

                    TokenStream tokenStream = m_analyzer.tokenStream(IndexDocument.TEXT,
                            new StringReader(text));
                    tokenStream.reset();

                    // Get 3 best fragments and separate with "..."
                    String hilite = highlighter.getBestFragments(tokenStream, text, 3, "...");

                    result.getHit(i).setText(hilite);
                }
            }

            //searcher.close();
            // reader.close();

            return result;
        } finally {
            m_lock.readLock().release();
        }
    } catch (InterruptedException ex) {
        throw new IOException(ex.getMessage());
    }
}

From source file:com.google.gerrit.server.change.ReviewerSuggestionCache.java

License:Apache License

List<AccountInfo> search(String query, int n) throws IOException {
    IndexSearcher searcher = get();
    if (searcher == null) {
        return Collections.emptyList();
    }//from w  w w .  j a  v  a2 s.  c o m

    List<String> segments = Splitter.on(' ').omitEmptyStrings().splitToList(query.toLowerCase());
    BooleanQuery q = new BooleanQuery();
    for (String field : ALL) {
        BooleanQuery and = new BooleanQuery();
        for (String s : segments) {
            and.add(new PrefixQuery(new Term(field, s)), Occur.MUST);
        }
        q.add(and, Occur.SHOULD);
    }

    TopDocs results = searcher.search(q, n);
    ScoreDoc[] hits = results.scoreDocs;

    List<AccountInfo> result = new LinkedList<>();

    for (ScoreDoc h : hits) {
        Document doc = searcher.doc(h.doc);

        IndexableField idField = checkNotNull(doc.getField(ID));
        AccountInfo info = new AccountInfo(idField.numericValue().intValue());
        info.name = doc.get(NAME);
        info.email = doc.get(EMAIL);
        info.username = doc.get(USERNAME);
        result.add(info);
    }

    return result;
}

From source file:com.greplin.interval.BaseIntervalQueryTest.java

License:Apache License

protected void assertSearch(IndexSearcher searcher, Query query, Integer... expectedResults)
        throws IOException {
    Set<Integer> expected = ImmutableSet.copyOf(expectedResults);

    TopDocs docs = searcher.search(query, 100);
    Set<Integer> actual = Sets.newHashSet();
    for (ScoreDoc scoreDoc : docs.scoreDocs) {
        Document doc = searcher.doc(scoreDoc.doc);
        actual.add(Integer.valueOf(doc.get("id")));
    }/*from  w w  w .  j a  va 2  s  .co m*/

    Assert.assertEquals(query + " should match [" + Joiner.on(", ").join(expectedResults) + "]", expected,
            actual);
}

From source file:com.hin.hl7messaging.LicenseeSearchService.java

public Object fillData(ScoreDoc[] hits, IndexSearcher searcher, SearchVO searchVO) throws Exception {
    List<ProfileVO> profileVOList = new ArrayList<ProfileVO>();
    for (int i = 0; i < hits.length; i++) {
        int docId = hits[i].doc;
        Document d = searcher.doc(docId);
        ProfileVO profileVO = new ProfileVO();
        profileVO.setPrefixName(d.get("namePrefix"));
        profileVO.setSuffixName(d.get("nameSuffix"));
        profileVO.setName(profileVO.getFullName());
        profileVO.setSubscriberId(d.get("subscriberId"));
        profileVO.setImageBase64(d.get("image"));
        profileVO.setRole(d.get("Role"));
        profileVO.setTelecom(d.get("telecom"));
        profileVOList.add(profileVO);/* ww  w  .java 2s  . co  m*/
    }
    return profileVOList;
}

From source file:com.icdd.lucence.SearchFiles.java

License:Apache License

public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {
    // Collect enough docs to show 5 pages
    Date starttime = new Date();

    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + "total matching documents");
    Date endtime = new Date();
    System.out.println("one search cost :" + (endtime.getTime() - starttime.getTime()) + "ms");
    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }/*from w w w. ja v  a2 s . c o m*/
            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) {
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");
            if (path != null) {
                System.out.println((i + 1) + ". " + path);
                String title = doc.get("title");
                if (title != null) {
                    System.out.println("   Title:" + title);
                }
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }
        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press");
                if (start - hitsPerPage > 0) {
                    System.out.print("(p)revious page,");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page,");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page.");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }

}

From source file:com.icdd.lucence.SearchFiles.java

License:Apache License

public static List<Document> doPagingSearchWeb(List<Document> docs, IndexSearcher searcher, Query query,
        int hitsPerPage, boolean raw) throws IOException {
    // Collect enough docs to show 5 pages
    Date starttime = new Date();

    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + "total matching documents");
    Date endtime = new Date();
    System.out.println("one search cost :" + (endtime.getTime() - starttime.getTime()) + "ms");

    System.out.println("?");

    for (ScoreDoc scoreDoc : hits) {
        docs.add(searcher.doc(scoreDoc.doc));
    }//from  w  ww.j a  va2 s. c  o m
    return docs;
}

From source file:com.ikon.analysis.SearchDemo.java

License:Open Source License

/**
 * Search in documents/*from w  ww.  j a v  a2  s  .  c  o  m*/
 */
private static void search(Directory index, Analyzer analyzer, String str)
        throws ParseException, CorruptIndexException, IOException {
    IndexReader reader = IndexReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopScoreDocCollector collector = TopScoreDocCollector.create(NUM_HITS, true);
    //Query q = new QueryParser(Config.LUCENE_VERSION, DOC_FIELD, analyzer).parse(str);
    Query q = new WildcardQuery(new Term(DOC_FIELD, str));
    System.out.println("Query: " + q);

    searcher.search(q, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;

    System.out.println("Found " + hits.length + " hits.");

    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document d = searcher.doc(docId);
        System.out.println((i + 1) + ". " + d.get(DOC_FIELD));
    }

    searcher.close();
}