Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID, Set<String> fieldsToLoad) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID, fieldsToLoad)

Usage

From source file:edu.stanford.muse.index.Indexer.java

License:Apache License

private org.apache.lucene.document.Document getLDoc(String docId, Boolean attachment, Set<String> fieldsToLoad)
        throws IOException {
    IndexSearcher searcher = null;
    if (!attachment) {
        if (isearcher == null) {
            DirectoryReader ireader = DirectoryReader.open(directory);
            isearcher = new IndexSearcher(ireader);
        }/*  w w  w .j a v a 2  s.  c o m*/
        searcher = isearcher;
    } else {
        if (isearcher_blob == null) {
            DirectoryReader ireader = DirectoryReader.open(directory_blob);
            isearcher_blob = new IndexSearcher(ireader);
        }
        searcher = isearcher_blob;
    }

    TermQuery q = new TermQuery(new Term("docId", docId));
    TopDocs td = searcher.search(q, 1); // there must be only 1 doc with this id anyway
    Util.softAssert(td.totalHits <= 1,
            "docId = " + docId + " is not unique. Found: " + td.totalHits + " hits!");
    ScoreDoc[] sd = td.scoreDocs;
    if (sd.length != 1) {
        // something went wrong... report it and ignore this doc
        Util.warnIf(true, "lookup failed for id " + docId);
        return null;
    }

    if (fieldsToLoad != null)
        return searcher.doc(sd[0].doc, fieldsToLoad);
    else
        return searcher.doc(sd[0].doc);
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

/**
 * This determines the possible facets for each of the categories.  For example - possible authors 
 * for the display.  This does not care about counts later on counts will be important.
 * /*from www  .j a v a2 s  . c  o m*/
 * @param topDocs - top doucment hits found
 * @param numberOfHitsToProcess
 * @return
 * @throws CorruptIndexException
 * @throws IOException
 */
private HashMap<String, HashMap<String, FacetResult>> generateFacetSearches(TopDocs topDocs,
        int numberOfHitsToProcess, int numberOfResultsToCollect, IndexSearcher searcher)
        throws CorruptIndexException, IOException {
    String[] fieldsToLoad = { DefaultInstitutionalItemIndexService.CONTRIBUTOR_NAMES,
            DefaultInstitutionalItemIndexService.LANGUAGE, DefaultInstitutionalItemIndexService.KEY_WORDS,
            DefaultInstitutionalItemIndexService.CONTENT_TYPES,
            DefaultInstitutionalItemIndexService.COLLECTION_NAME,
            DefaultInstitutionalItemIndexService.CONTRIBUTOR_NAMES };

    MapFieldSelector fieldSelector = new MapFieldSelector(fieldsToLoad);
    HashMap<String, HashMap<String, FacetResult>> facets = new HashMap<String, HashMap<String, FacetResult>>();
    HashMap<String, FacetResult> authorsMap = new HashMap<String, FacetResult>();
    HashMap<String, FacetResult> languagesMap = new HashMap<String, FacetResult>();
    HashMap<String, FacetResult> subjectsMap = new HashMap<String, FacetResult>();
    HashMap<String, FacetResult> formatsMap = new HashMap<String, FacetResult>();
    HashMap<String, FacetResult> collectionMap = new HashMap<String, FacetResult>();

    facets.put(AUTHOR_MAP, authorsMap);
    facets.put(LANGUAGE_MAP, languagesMap);
    facets.put(SUBJECT_MAP, subjectsMap);
    facets.put(FORMAT_MAP, formatsMap);
    facets.put(COLLECTION_MAP, collectionMap);

    int length = topDocs.totalHits;

    if (length <= numberOfHitsToProcess) {
        numberOfHitsToProcess = length;
    }

    for (int index = 0; index < numberOfHitsToProcess; index++) {

        Document doc = searcher.doc(topDocs.scoreDocs[index].doc, fieldSelector);
        String[] names = doc.getValues(DefaultInstitutionalItemIndexService.CONTRIBUTOR_NAMES);

        String language = doc.get(DefaultInstitutionalItemIndexService.LANGUAGE);

        String[] subjects = doc.getValues(DefaultInstitutionalItemIndexService.KEY_WORDS);

        String[] formats = doc.getValues(DefaultInstitutionalItemIndexService.CONTENT_TYPES);

        String collection = doc.get(DefaultInstitutionalItemIndexService.COLLECTION_NAME);

        if (collection != null) {
            collection = collection.trim();
            FacetResult f = collectionMap.get(collection);
            if (f == null) {
                f = new FacetResult(1l, DefaultInstitutionalItemIndexService.COLLECTION_NAME, collection);
                collectionMap.put(collection, f);
            }
        }

        if (authorsMap.size() < numberOfResultsToCollect) {
            int count = 0;
            while (count < names.length && authorsMap.size() < numberOfResultsToCollect) {

                FacetResult f = authorsMap.get(names[count]);
                if (f == null) {
                    f = new FacetResult(1l, DefaultInstitutionalItemIndexService.CONTRIBUTOR_NAMES,
                            names[count]);
                    authorsMap.put(names[count], f);
                }

                count = count + 1;
            }
        }

        if (language != null && languagesMap.size() < numberOfResultsToCollect) {
            language = language.trim();
            FacetResult f = languagesMap.get(language);
            if (f == null) {
                f = new FacetResult(1l, DefaultInstitutionalItemIndexService.LANGUAGE, language);
                languagesMap.put(language, f);
            }
        }

        if (subjectsMap.size() < numberOfResultsToCollect) {
            int count = 0;
            while (count < subjects.length && subjectsMap.size() < numberOfResultsToCollect) {
                FacetResult f = subjectsMap.get(subjects[count]);
                if (f == null) {
                    f = new FacetResult(1l, DefaultInstitutionalItemIndexService.KEY_WORDS, subjects[count]);
                    subjectsMap.put(subjects[count], f);
                }
                count = count + 1;
            }
        }

        if (formatsMap.size() < numberOfResultsToCollect) {
            int count = 0;
            while (count < formats.length && formatsMap.size() < numberOfResultsToCollect) {
                FacetResult f = formatsMap.get(formats[count]);
                if (f == null) {
                    f = new FacetResult(1l, DefaultInstitutionalItemIndexService.CONTENT_TYPES, formats[count]);
                    formatsMap.put(formats[count], f);
                }
                count = count + 1;
            }

        }

        doc = null;
    }
    return facets;
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

/** 
 * Process the possible facets and determine the number of hits for each facet accross the main query.
 * /* ww w  .  j a v a  2s .co  m*/
 * @param possibleFacets - possible facets to show to the user
 * @param reader - lucene reader
 * @param mainQueryBits - bitset from the main query
 * @param facetResults - set of facet results
 * @param hits - number of hits
 * @param numberOfIdsToCollect - number of ids to collect and show to user
 * @param mainQueryString - main query 
 * 
 * @return - search helper
 * @throws ParseException
 * @throws IOException
 */
private FacetSearchHelper processPossibleFacets(HashMap<String, HashMap<String, FacetResult>> possibleFacets,
        IndexReader reader, OpenBitSetDISI mainQueryBits, HashMap<String, Collection<FacetResult>> facetResults,
        TopDocs hits, int numberOfIdsToCollect, int idsToCollectStartPosition, int numberOfFacetsToShow,
        String mainQueryString, IndexSearcher searcher) throws ParseException, IOException {
    FacetResultHitComparator facetResultHitComparator = new FacetResultHitComparator();
    // get the authors and create a facet for each author
    // determine the number of hits the author has in the main query
    HashMap<String, FacetResult> authorFacetMap = possibleFacets.get(AUTHOR_MAP);
    LinkedList<FacetResult> authorFacets = new LinkedList<FacetResult>();
    authorFacets.addAll(authorFacetMap.values());
    processFacetCategory(authorFacets, reader, mainQueryBits, searcher);
    Collections.sort(authorFacets, facetResultHitComparator);

    // final holder of facets
    LinkedList<FacetResult> finalAuthorFacets;

    if (authorFacets.size() <= numberOfFacetsToShow) {
        finalAuthorFacets = authorFacets;
    } else {
        finalAuthorFacets = new LinkedList<FacetResult>();
        for (int index = 0; index < numberOfFacetsToShow; index++) {
            finalAuthorFacets.add(authorFacets.get(index));
        }
    }

    facetResults.put(AUTHOR_MAP, finalAuthorFacets);

    // get the subjects and create a facet for each subject
    // determine the number of hits the subject has in the main query
    HashMap<String, FacetResult> subjectFacetMap = possibleFacets.get(SUBJECT_MAP);
    LinkedList<FacetResult> subjectFacets = new LinkedList<FacetResult>();
    subjectFacets.addAll(subjectFacetMap.values());
    processFacetCategory(subjectFacets, reader, mainQueryBits, searcher);
    Collections.sort(subjectFacets, facetResultHitComparator);

    // final holder of facets
    LinkedList<FacetResult> finalSubjectFacets;

    if (subjectFacets.size() <= numberOfFacetsToShow) {
        finalSubjectFacets = subjectFacets;
    } else {
        finalSubjectFacets = new LinkedList<FacetResult>();
        for (int index = 0; index < numberOfFacetsToShow; index++) {
            finalSubjectFacets.add(subjectFacets.get(index));
        }
    }

    facetResults.put(SUBJECT_MAP, finalSubjectFacets);

    // get the language and create a facet for each language
    // determine the number of hits the language has in the main query
    HashMap<String, FacetResult> languageFacetMap = possibleFacets.get(LANGUAGE_MAP);
    LinkedList<FacetResult> languageFacets = new LinkedList<FacetResult>();
    languageFacets.addAll(languageFacetMap.values());
    processFacetCategory(languageFacets, reader, mainQueryBits, searcher);
    Collections.sort(languageFacets, facetResultHitComparator);

    // final holder of facets
    LinkedList<FacetResult> finalLanguageFacets;

    if (languageFacets.size() <= numberOfFacetsToShow) {
        finalLanguageFacets = languageFacets;
    } else {
        finalLanguageFacets = new LinkedList<FacetResult>();
        for (int index = 0; index < numberOfFacetsToShow; index++) {
            finalLanguageFacets.add(languageFacets.get(index));
        }
    }

    facetResults.put(LANGUAGE_MAP, finalLanguageFacets);

    // get the format and create a facet for each format
    // determine the number of hits the format has in the main query
    HashMap<String, FacetResult> formatFacetMap = possibleFacets.get(FORMAT_MAP);
    LinkedList<FacetResult> formatFacets = new LinkedList<FacetResult>();
    formatFacets.addAll(formatFacetMap.values());
    processFacetCategory(formatFacets, reader, mainQueryBits, searcher);
    Collections.sort(formatFacets, facetResultHitComparator);

    // final holder of facets
    LinkedList<FacetResult> finalFormatFacets;

    if (formatFacets.size() <= numberOfFacetsToShow) {
        finalFormatFacets = formatFacets;
    } else {
        finalFormatFacets = new LinkedList<FacetResult>();
        for (int index = 0; index < numberOfFacetsToShow; index++) {
            finalFormatFacets.add(formatFacets.get(index));
        }
    }

    facetResults.put(FORMAT_MAP, finalFormatFacets);

    // get the format and create a facet for each format
    // determine the number of hits the format has in the main query
    HashMap<String, FacetResult> collectionFacetMap = possibleFacets.get(COLLECTION_MAP);
    LinkedList<FacetResult> collectionFacets = new LinkedList<FacetResult>();
    collectionFacets.addAll(collectionFacetMap.values());
    processFacetCategory(collectionFacets, reader, mainQueryBits, searcher);
    Collections.sort(collectionFacets, facetResultHitComparator);

    // final holder of facets
    LinkedList<FacetResult> finalCollectionFacets;

    if (collectionFacets.size() <= numberOfFacetsToShow) {
        finalCollectionFacets = collectionFacets;
    } else {
        finalCollectionFacets = new LinkedList<FacetResult>();
        for (int index = 0; index < numberOfFacetsToShow; index++) {
            finalCollectionFacets.add(collectionFacets.get(index));
        }
    }
    facetResults.put(COLLECTION_MAP, finalCollectionFacets);

    HashSet<Long> ids = new HashSet<Long>();

    // end position of id's to collect will be start position plus the number to collect
    int endPosition = idsToCollectStartPosition + numberOfIdsToCollect;

    // make sure that the end position is set up correctly
    if (hits.totalHits < endPosition) {
        endPosition = hits.totalHits;
    }

    String[] fieldsToLoad = { DefaultInstitutionalItemIndexService.ID };
    MapFieldSelector fieldSelector = new MapFieldSelector(fieldsToLoad);
    for (int index = idsToCollectStartPosition; index < endPosition; index++) {
        Document doc = searcher.doc(hits.scoreDocs[index].doc, fieldSelector);
        ids.add(NumericUtils.prefixCodedToLong(doc.get(DefaultInstitutionalItemIndexService.ID)));
    }
    FacetSearchHelper helper = new FacetSearchHelper(ids, hits.totalHits, facetResults, mainQueryString);
    return helper;
}

From source file:io.puntanegra.fhir.index.lucene.LuceneDocumentIterator.java

License:Apache License

private void fetch() {
    try {//from  w  w w.  j ava 2s  .c  o  m
        IndexSearcher searcher = manager.acquire();

        try {

            TimeCounter time = TimeCounter.create().start();

            // Search for top documents
            TopDocs topDocs = null;
            if (this.sort != null) {
                sort = sort.rewrite(searcher);
                topDocs = searcher.searchAfter(after, query, page, sort);
            } else {
                topDocs = searcher.searchAfter(after, query, page);
            }
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;

            // Check inf mayHaveMore
            mayHaveMore = scoreDocs.length == page;

            // Collect the documents from query result
            for (ScoreDoc scoreDoc : scoreDocs) {
                Document document = searcher.doc(scoreDoc.doc, fields);
                documents.add(Pair.create(document, scoreDoc));
                after = scoreDoc;
            }

            logger.debug("Get page with {} documents in {}", scoreDocs.length, time.stop());

        } finally {
            manager.release(searcher);
        }

    } catch (Exception e) {
        e.printStackTrace();
        throw new FhirIndexException(e, "Error searching in with %s and %s", query, sort);
    }
}

From source file:io.puntanegra.fhir.index.lucene.LuceneRAMIndex.java

License:Apache License

/**
 * Finds the top {@code count} hits for {@code query} and sorting the hits
 * by {@code sort}./* w  ww.ja  va 2  s  .  com*/
 *
 * @param query
 *            the {@link Query} to search for
 * @param sort
 *            the {@link Sort} to be applied
 * @param count
 *            the max number of results to be collected
 * @param fields
 *            the names of the fields to be loaded
 * @return the found documents
 */
public List<Document> search(Query query, Sort sort, Integer count, Set<String> fields) {
    try {
        indexWriter.commit();
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        sort = sort.rewrite(searcher);
        TopDocs topDocs = searcher.search(query, count, sort);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        List<Document> documents = new LinkedList<>();
        for (ScoreDoc scoreDoc : scoreDocs) {
            Document document = searcher.doc(scoreDoc.doc, fields);
            documents.add(document);
        }
        searcher.getIndexReader().close();
        return documents;
    } catch (IOException e) {
        throw new FhirIndexException(e, "Error while searching");
    }
}

From source file:it.pronetics.madstore.repository.index.impl.LuceneSearcher.java

License:Apache License

private PagingList<SearchResult> doSearch(Query query, int offset, int max) throws Exception {
    if (max == 0) {
        max = LuceneIndexManager.DEFAULT_MAX_SEARCH_RESULTS;
    }/*from   w w  w  .ja  v  a  2 s.c o  m*/
    int limit = offset + max;
    IndexSearcher searcher = threadLocalSeacher.get();
    TopFieldDocs topFieldDocs = searcher.search(query, null, limit, new Sort(SortField.FIELD_SCORE));
    PagingList<SearchResult> entryItems = new PagingList<SearchResult>(new ArrayList<SearchResult>(), offset,
            max, topFieldDocs.totalHits);
    for (int i = offset; i < (offset + max) && i < topFieldDocs.totalHits; i++) {
        Document doc = searcher.doc(topFieldDocs.scoreDocs[i].doc, new MapFieldSelector(
                new String[] { LuceneIndexManager.INDEX_COLLECTION_KEY, LuceneIndexManager.INDEX_ENTRY_KEY }));
        String collectionKey = doc.get(LuceneIndexManager.INDEX_COLLECTION_KEY);
        String entryKey = doc.get(LuceneIndexManager.INDEX_ENTRY_KEY);
        if (collectionKey != null && entryKey != null) {
            entryItems.add(new SearchResult(collectionKey, entryKey));
        } else {
            LOG.warn("Found an entry with missing collection ({}) or entry ({}) key.", collectionKey, entryKey);
        }
    }
    return entryItems;
}