Example usage for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID, Set<String> fieldsToLoad) throws IOException

Source Link

Document

Sugar for .getIndexReader().document(docID, fieldsToLoad)

Usage

From source file:edu.stanford.muse.index.Indexer.java

License:Apache License

private org.apache.lucene.document.Document getLDoc(String docId, Boolean attachment, Set<String> fieldsToLoad)
        throws IOException {
    IndexSearcher searcher = null;
    if (!attachment) {
        if (isearcher == null) {
            DirectoryReader ireader = DirectoryReader.open(directory);
            isearcher = new IndexSearcher(ireader);
        }/*  w w  w .j a v a 2  s.  c o m*/
        searcher = isearcher;
    } else {
        if (isearcher_blob == null) {
            DirectoryReader ireader = DirectoryReader.open(directory_blob);
            isearcher_blob = new IndexSearcher(ireader);
        }
        searcher = isearcher_blob;
    }

    TermQuery q = new TermQuery(new Term("docId", docId));
    TopDocs td = searcher.search(q, 1); // there must be only 1 doc with this id anyway
    Util.softAssert(td.totalHits <= 1,
            "docId = " + docId + " is not unique. Found: " + td.totalHits + " hits!");
    ScoreDoc[] sd = td.scoreDocs;
    if (sd.length != 1) {
        // something went wrong... report it and ignore this doc
        Util.warnIf(true, "lookup failed for id " + docId);
        return null;
    }

    if (fieldsToLoad != null)
        return searcher.doc(sd[0].doc, fieldsToLoad);
    else
        return searcher.doc(sd[0].doc);
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

/**
 * This determines the possible facets for each of the categories.  For example - possible authors 
 * for the display.  This does not care about counts later on counts will be important.
 * /*from www  .j a v a2 s  . c  o m*/
 * @param topDocs - top doucment hits found
 * @param numberOfHitsToProcess
 * @return
 * @throws CorruptIndexException
 * @throws IOException
 */
private HashMap<String, HashMap<String, FacetResult>> generateFacetSearches(TopDocs topDocs,
        int numberOfHitsToProcess, int numberOfResultsToCollect, IndexSearcher searcher)
        throws CorruptIndexException, IOException {
    String[] fieldsToLoad = { DefaultInstitutionalItemIndexService.CONTRIBUTOR_NAMES,
            DefaultInstitutionalItemIndexService.LANGUAGE, DefaultInstitutionalItemIndexService.KEY_WORDS,
            DefaultInstitutionalItemIndexService.CONTENT_TYPES,
            DefaultInstitutionalItemIndexService.COLLECTION_NAME,
            DefaultInstitutionalItemIndexService.CONTRIBUTOR_NAMES };

    MapFieldSelector fieldSelector = new MapFieldSelector(fieldsToLoad);
    HashMap<String, HashMap<String, FacetResult>> facets = new HashMap<String, HashMap<String, FacetResult>>();
    HashMap<String, FacetResult> authorsMap = new HashMap<String, FacetResult>();
    HashMap<String, FacetResult> languagesMap = new HashMap<String, FacetResult>();
    HashMap<String, FacetResult> subjectsMap = new HashMap<String, FacetResult>();
    HashMap<String, FacetResult> formatsMap = new HashMap<String, FacetResult>();
    HashMap<String, FacetResult> collectionMap = new HashMap<String, FacetResult>();

    facets.put(AUTHOR_MAP, authorsMap);
    facets.put(LANGUAGE_MAP, languagesMap);
    facets.put(SUBJECT_MAP, subjectsMap);
    facets.put(FORMAT_MAP, formatsMap);
    facets.put(COLLECTION_MAP, collectionMap);

    int length = topDocs.totalHits;

    if (length <= numberOfHitsToProcess) {
        numberOfHitsToProcess = length;
    }

    for (int index = 0; index < numberOfHitsToProcess; index++) {

        Document doc = searcher.doc(topDocs.scoreDocs[index].doc, fieldSelector);
        String[] names = doc.getValues(DefaultInstitutionalItemIndexService.CONTRIBUTOR_NAMES);

        String language = doc.get(DefaultInstitutionalItemIndexService.LANGUAGE);

        String[] subjects = doc.getValues(DefaultInstitutionalItemIndexService.KEY_WORDS);

        String[] formats = doc.getValues(DefaultInstitutionalItemIndexService.CONTENT_TYPES);

        String collection = doc.get(DefaultInstitutionalItemIndexService.COLLECTION_NAME);

        if (collection != null) {
            collection = collection.trim();
            FacetResult f = collectionMap.get(collection);
            if (f == null) {
                f = new FacetResult(1l, DefaultInstitutionalItemIndexService.COLLECTION_NAME, collection);
                collectionMap.put(collection, f);
            }
        }

        if (authorsMap.size() < numberOfResultsToCollect) {
            int count = 0;
            while (count < names.length && authorsMap.size() < numberOfResultsToCollect) {

                FacetResult f = authorsMap.get(names[count]);
                if (f == null) {
                    f = new FacetResult(1l, DefaultInstitutionalItemIndexService.CONTRIBUTOR_NAMES,
                            names[count]);
                    authorsMap.put(names[count], f);
                }

                count = count + 1;
            }
        }

        if (language != null && languagesMap.size() < numberOfResultsToCollect) {
            language = language.trim();
            FacetResult f = languagesMap.get(language);
            if (f == null) {
                f = new FacetResult(1l, DefaultInstitutionalItemIndexService.LANGUAGE, language);
                languagesMap.put(language, f);
            }
        }

        if (subjectsMap.size() < numberOfResultsToCollect) {
            int count = 0;
            while (count < subjects.length && subjectsMap.size() < numberOfResultsToCollect) {
                FacetResult f = subjectsMap.get(subjects[count]);
                if (f == null) {
                    f = new FacetResult(1l, DefaultInstitutionalItemIndexService.KEY_WORDS, subjects[count]);
                    subjectsMap.put(subjects[count], f);
                }
                count = count + 1;
            }
        }

        if (formatsMap.size() < numberOfResultsToCollect) {
            int count = 0;
            while (count < formats.length && formatsMap.size() < numberOfResultsToCollect) {
                FacetResult f = formatsMap.get(formats[count]);
                if (f == null) {
                    f = new FacetResult(1l, DefaultInstitutionalItemIndexService.CONTENT_TYPES, formats[count]);
                    formatsMap.put(formats[count], f);
                }
                count = count + 1;
            }

        }

        doc = null;
    }
    return facets;
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

/** 
 * Process the possible facets and determine the number of hits for each facet accross the main query.
 * /* ww w  .  j a v a  2s .co  m*/
 * @param possibleFacets - possible facets to show to the user
 * @param reader - lucene reader
 * @param mainQueryBits - bitset from the main query
 * @param facetResults - set of facet results
 * @param hits - number of hits
 * @param numberOfIdsToCollect - number of ids to collect and show to user
 * @param mainQueryString - main query 
 * 
 * @return - search helper
 * @throws ParseException
 * @throws IOException
 */
private FacetSearchHelper processPossibleFacets(HashMap<String, HashMap<String, FacetResult>> possibleFacets,
        IndexReader reader, OpenBitSetDISI mainQueryBits, HashMap<String, Collection<FacetResult>> facetResults,
        TopDocs hits, int numberOfIdsToCollect, int idsToCollectStartPosition, int numberOfFacetsToShow,
        String mainQueryString, IndexSearcher searcher) throws ParseException, IOException {
    FacetResultHitComparator facetResultHitComparator = new FacetResultHitComparator();
    // get the authors and create a facet for each author
    // determine the number of hits the author has in the main query
    HashMap<String, FacetResult> authorFacetMap = possibleFacets.get(AUTHOR_MAP);
    LinkedList<FacetResult> authorFacets = new LinkedList<FacetResult>();
    authorFacets.addAll(authorFacetMap.values());
    processFacetCategory(authorFacets, reader, mainQueryBits, searcher);
    Collections.sort(authorFacets, facetResultHitComparator);

    // final holder of facets
    LinkedList<FacetResult> finalAuthorFacets;

    if (authorFacets.size() <= numberOfFacetsToShow) {
        finalAuthorFacets = authorFacets;
    } else {
        finalAuthorFacets = new LinkedList<FacetResult>();
        for (int index = 0; index < numberOfFacetsToShow; index++) {
            finalAuthorFacets.add(authorFacets.get(index));
        }
    }

    facetResults.put(AUTHOR_MAP, finalAuthorFacets);

    // get the subjects and create a facet for each subject
    // determine the number of hits the subject has in the main query
    HashMap<String, FacetResult> subjectFacetMap = possibleFacets.get(SUBJECT_MAP);
    LinkedList<FacetResult> subjectFacets = new LinkedList<FacetResult>();
    subjectFacets.addAll(subjectFacetMap.values());
    processFacetCategory(subjectFacets, reader, mainQueryBits, searcher);
    Collections.sort(subjectFacets, facetResultHitComparator);

    // final holder of facets
    LinkedList<FacetResult> finalSubjectFacets;

    if (subjectFacets.size() <= numberOfFacetsToShow) {
        finalSubjectFacets = subjectFacets;
    } else {
        finalSubjectFacets = new LinkedList<FacetResult>();
        for (int index = 0; index < numberOfFacetsToShow; index++) {
            finalSubjectFacets.add(subjectFacets.get(index));
        }
    }

    facetResults.put(SUBJECT_MAP, finalSubjectFacets);

    // get the language and create a facet for each language
    // determine the number of hits the language has in the main query
    HashMap<String, FacetResult> languageFacetMap = possibleFacets.get(LANGUAGE_MAP);
    LinkedList<FacetResult> languageFacets = new LinkedList<FacetResult>();
    languageFacets.addAll(languageFacetMap.values());
    processFacetCategory(languageFacets, reader, mainQueryBits, searcher);
    Collections.sort(languageFacets, facetResultHitComparator);

    // final holder of facets
    LinkedList<FacetResult> finalLanguageFacets;

    if (languageFacets.size() <= numberOfFacetsToShow) {
        finalLanguageFacets = languageFacets;
    } else {
        finalLanguageFacets = new LinkedList<FacetResult>();
        for (int index = 0; index < numberOfFacetsToShow; index++) {
            finalLanguageFacets.add(languageFacets.get(index));
        }
    }

    facetResults.put(LANGUAGE_MAP, finalLanguageFacets);

    // get the format and create a facet for each format
    // determine the number of hits the format has in the main query
    HashMap<String, FacetResult> formatFacetMap = possibleFacets.get(FORMAT_MAP);
    LinkedList<FacetResult> formatFacets = new LinkedList<FacetResult>();
    formatFacets.addAll(formatFacetMap.values());
    processFacetCategory(formatFacets, reader, mainQueryBits, searcher);
    Collections.sort(formatFacets, facetResultHitComparator);

    // final holder of facets
    LinkedList<FacetResult> finalFormatFacets;

    if (formatFacets.size() <= numberOfFacetsToShow) {
        finalFormatFacets = formatFacets;
    } else {
        finalFormatFacets = new LinkedList<FacetResult>();
        for (int index = 0; index < numberOfFacetsToShow; index++) {
            finalFormatFacets.add(formatFacets.get(index));
        }
    }

    facetResults.put(FORMAT_MAP, finalFormatFacets);

    // get the format and create a facet for each format
    // determine the number of hits the format has in the main query
    HashMap<String, FacetResult> collectionFacetMap = possibleFacets.get(COLLECTION_MAP);
    LinkedList<FacetResult> collectionFacets = new LinkedList<FacetResult>();
    collectionFacets.addAll(collectionFacetMap.values());
    processFacetCategory(collectionFacets, reader, mainQueryBits, searcher);
    Collections.sort(collectionFacets, facetResultHitComparator);

    // final holder of facets
    LinkedList<FacetResult> finalCollectionFacets;

    if (collectionFacets.size() <= numberOfFacetsToShow) {
        finalCollectionFacets = collectionFacets;
    } else {
        finalCollectionFacets = new LinkedList<FacetResult>();
        for (int index = 0; index < numberOfFacetsToShow; index++) {
            finalCollectionFacets.add(collectionFacets.get(index));
        }
    }
    facetResults.put(COLLECTION_MAP, finalCollectionFacets);

    HashSet<Long> ids = new HashSet<Long>();

    // end position of id's to collect will be start position plus the number to collect
    int endPosition = idsToCollectStartPosition + numberOfIdsToCollect;

    // make sure that the end position is set up correctly
    if (hits.totalHits < endPosition) {
        endPosition = hits.totalHits;
    }

    String[] fieldsToLoad = { DefaultInstitutionalItemIndexService.ID };
    MapFieldSelector fieldSelector = new MapFieldSelector(fieldsToLoad);
    for (int index = idsToCollectStartPosition; index < endPosition; index++) {
        Document doc = searcher.doc(hits.scoreDocs[index].doc, fieldSelector);
        ids.add(NumericUtils.prefixCodedToLong(doc.get(DefaultInstitutionalItemIndexService.ID)));
    }
    FacetSearchHelper helper = new FacetSearchHelper(ids, hits.totalHits, facetResults, mainQueryString);
    return helper;
}

From source file:io.puntanegra.fhir.index.lucene.LuceneDocumentIterator.java

License:Apache License

private void fetch() {
    try {//from  w  w w.  j ava 2s  .c  o  m
        IndexSearcher searcher = manager.acquire();

        try {

            TimeCounter time = TimeCounter.create().start();

            // Search for top documents
            TopDocs topDocs = null;
            if (this.sort != null) {
                sort = sort.rewrite(searcher);
                topDocs = searcher.searchAfter(after, query, page, sort);
            } else {
                topDocs = searcher.searchAfter(after, query, page);
            }
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;

            // Check inf mayHaveMore
            mayHaveMore = scoreDocs.length == page;

            // Collect the documents from query result
            for (ScoreDoc scoreDoc : scoreDocs) {
                Document document = searcher.doc(scoreDoc.doc, fields);
                documents.add(Pair.create(document, scoreDoc));
                after = scoreDoc;
            }

            logger.debug("Get page with {} documents in {}", scoreDocs.length, time.stop());

        } finally {
            manager.release(searcher);
        }

    } catch (Exception e) {
        e.printStackTrace();
        throw new FhirIndexException(e, "Error searching in with %s and %s", query, sort);
    }
}

From source file:io.puntanegra.fhir.index.lucene.LuceneRAMIndex.java

License:Apache License

/**
 * Finds the top {@code count} hits for {@code query} and sorting the hits
 * by {@code sort}./* w  ww.ja  va 2  s  .  com*/
 *
 * @param query
 *            the {@link Query} to search for
 * @param sort
 *            the {@link Sort} to be applied
 * @param count
 *            the max number of results to be collected
 * @param fields
 *            the names of the fields to be loaded
 * @return the found documents
 */
public List<Document> search(Query query, Sort sort, Integer count, Set<String> fields) {
    try {
        indexWriter.commit();
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        sort = sort.rewrite(searcher);
        TopDocs topDocs = searcher.search(query, count, sort);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        List<Document> documents = new LinkedList<>();
        for (ScoreDoc scoreDoc : scoreDocs) {
            Document document = searcher.doc(scoreDoc.doc, fields);
            documents.add(document);
        }
        searcher.getIndexReader().close();
        return documents;
    } catch (IOException e) {
        throw new FhirIndexException(e, "Error while searching");
    }
}

From source file:it.pronetics.madstore.repository.index.impl.LuceneSearcher.java

License:Apache License

private PagingList<SearchResult> doSearch(Query query, int offset, int max) throws Exception {
    if (max == 0) {
        max = LuceneIndexManager.DEFAULT_MAX_SEARCH_RESULTS;
    }/*from   w w  w  .ja  v  a  2 s.c o  m*/
    int limit = offset + max;
    IndexSearcher searcher = threadLocalSeacher.get();
    TopFieldDocs topFieldDocs = searcher.search(query, null, limit, new Sort(SortField.FIELD_SCORE));
    PagingList<SearchResult> entryItems = new PagingList<SearchResult>(new ArrayList<SearchResult>(), offset,
            max, topFieldDocs.totalHits);
    for (int i = offset; i < (offset + max) && i < topFieldDocs.totalHits; i++) {
        Document doc = searcher.doc(topFieldDocs.scoreDocs[i].doc, new MapFieldSelector(
                new String[] { LuceneIndexManager.INDEX_COLLECTION_KEY, LuceneIndexManager.INDEX_ENTRY_KEY }));
        String collectionKey = doc.get(LuceneIndexManager.INDEX_COLLECTION_KEY);
        String entryKey = doc.get(LuceneIndexManager.INDEX_ENTRY_KEY);
        if (collectionKey != null && entryKey != null) {
            entryItems.add(new SearchResult(collectionKey, entryKey));
        } else {
            LOG.warn("Found an entry with missing collection ({}) or entry ({}) key.", collectionKey, entryKey);
        }
    }
    return entryItems;
}