List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID, Set<String> fieldsToLoad) throws IOException
.getIndexReader().document(docID, fieldsToLoad)
From source file:edu.stanford.muse.index.Indexer.java
License:Apache License
private org.apache.lucene.document.Document getLDoc(String docId, Boolean attachment, Set<String> fieldsToLoad) throws IOException { IndexSearcher searcher = null; if (!attachment) { if (isearcher == null) { DirectoryReader ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); }/* w w w .j a v a 2 s. c o m*/ searcher = isearcher; } else { if (isearcher_blob == null) { DirectoryReader ireader = DirectoryReader.open(directory_blob); isearcher_blob = new IndexSearcher(ireader); } searcher = isearcher_blob; } TermQuery q = new TermQuery(new Term("docId", docId)); TopDocs td = searcher.search(q, 1); // there must be only 1 doc with this id anyway Util.softAssert(td.totalHits <= 1, "docId = " + docId + " is not unique. Found: " + td.totalHits + " hits!"); ScoreDoc[] sd = td.scoreDocs; if (sd.length != 1) { // something went wrong... report it and ignore this doc Util.warnIf(true, "lookup failed for id " + docId); return null; } if (fieldsToLoad != null) return searcher.doc(sd[0].doc, fieldsToLoad); else return searcher.doc(sd[0].doc); }
From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java
License:Apache License
/** * This determines the possible facets for each of the categories. For example - possible authors * for the display. This does not care about counts later on counts will be important. * /*from www .j a v a2 s . c o m*/ * @param topDocs - top doucment hits found * @param numberOfHitsToProcess * @return * @throws CorruptIndexException * @throws IOException */ private HashMap<String, HashMap<String, FacetResult>> generateFacetSearches(TopDocs topDocs, int numberOfHitsToProcess, int numberOfResultsToCollect, IndexSearcher searcher) throws CorruptIndexException, IOException { String[] fieldsToLoad = { DefaultInstitutionalItemIndexService.CONTRIBUTOR_NAMES, DefaultInstitutionalItemIndexService.LANGUAGE, DefaultInstitutionalItemIndexService.KEY_WORDS, DefaultInstitutionalItemIndexService.CONTENT_TYPES, DefaultInstitutionalItemIndexService.COLLECTION_NAME, DefaultInstitutionalItemIndexService.CONTRIBUTOR_NAMES }; MapFieldSelector fieldSelector = new MapFieldSelector(fieldsToLoad); HashMap<String, HashMap<String, FacetResult>> facets = new HashMap<String, HashMap<String, FacetResult>>(); HashMap<String, FacetResult> authorsMap = new HashMap<String, FacetResult>(); HashMap<String, FacetResult> languagesMap = new HashMap<String, FacetResult>(); HashMap<String, FacetResult> subjectsMap = new HashMap<String, FacetResult>(); HashMap<String, FacetResult> formatsMap = new HashMap<String, FacetResult>(); HashMap<String, FacetResult> collectionMap = new HashMap<String, FacetResult>(); facets.put(AUTHOR_MAP, authorsMap); facets.put(LANGUAGE_MAP, languagesMap); facets.put(SUBJECT_MAP, subjectsMap); facets.put(FORMAT_MAP, formatsMap); facets.put(COLLECTION_MAP, collectionMap); int length = topDocs.totalHits; if (length <= numberOfHitsToProcess) { numberOfHitsToProcess = length; } for (int index = 0; index < numberOfHitsToProcess; index++) { Document doc = searcher.doc(topDocs.scoreDocs[index].doc, fieldSelector); String[] names = doc.getValues(DefaultInstitutionalItemIndexService.CONTRIBUTOR_NAMES); String language = doc.get(DefaultInstitutionalItemIndexService.LANGUAGE); String[] subjects = doc.getValues(DefaultInstitutionalItemIndexService.KEY_WORDS); String[] formats = doc.getValues(DefaultInstitutionalItemIndexService.CONTENT_TYPES); String collection = doc.get(DefaultInstitutionalItemIndexService.COLLECTION_NAME); if (collection != null) { collection = collection.trim(); FacetResult f = collectionMap.get(collection); if (f == null) { f = new FacetResult(1l, DefaultInstitutionalItemIndexService.COLLECTION_NAME, collection); collectionMap.put(collection, f); } } if (authorsMap.size() < numberOfResultsToCollect) { int count = 0; while (count < names.length && authorsMap.size() < numberOfResultsToCollect) { FacetResult f = authorsMap.get(names[count]); if (f == null) { f = new FacetResult(1l, DefaultInstitutionalItemIndexService.CONTRIBUTOR_NAMES, names[count]); authorsMap.put(names[count], f); } count = count + 1; } } if (language != null && languagesMap.size() < numberOfResultsToCollect) { language = language.trim(); FacetResult f = languagesMap.get(language); if (f == null) { f = new FacetResult(1l, DefaultInstitutionalItemIndexService.LANGUAGE, language); languagesMap.put(language, f); } } if (subjectsMap.size() < numberOfResultsToCollect) { int count = 0; while (count < subjects.length && subjectsMap.size() < numberOfResultsToCollect) { FacetResult f = subjectsMap.get(subjects[count]); if (f == null) { f = new FacetResult(1l, DefaultInstitutionalItemIndexService.KEY_WORDS, subjects[count]); subjectsMap.put(subjects[count], f); } count = count + 1; } } if (formatsMap.size() < numberOfResultsToCollect) { int count = 0; while (count < formats.length && formatsMap.size() < numberOfResultsToCollect) { FacetResult f = formatsMap.get(formats[count]); if (f == null) { f = new FacetResult(1l, DefaultInstitutionalItemIndexService.CONTENT_TYPES, formats[count]); formatsMap.put(formats[count], f); } count = count + 1; } } doc = null; } return facets; }
From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java
License:Apache License
/** * Process the possible facets and determine the number of hits for each facet accross the main query. * /* ww w . j a v a 2s .co m*/ * @param possibleFacets - possible facets to show to the user * @param reader - lucene reader * @param mainQueryBits - bitset from the main query * @param facetResults - set of facet results * @param hits - number of hits * @param numberOfIdsToCollect - number of ids to collect and show to user * @param mainQueryString - main query * * @return - search helper * @throws ParseException * @throws IOException */ private FacetSearchHelper processPossibleFacets(HashMap<String, HashMap<String, FacetResult>> possibleFacets, IndexReader reader, OpenBitSetDISI mainQueryBits, HashMap<String, Collection<FacetResult>> facetResults, TopDocs hits, int numberOfIdsToCollect, int idsToCollectStartPosition, int numberOfFacetsToShow, String mainQueryString, IndexSearcher searcher) throws ParseException, IOException { FacetResultHitComparator facetResultHitComparator = new FacetResultHitComparator(); // get the authors and create a facet for each author // determine the number of hits the author has in the main query HashMap<String, FacetResult> authorFacetMap = possibleFacets.get(AUTHOR_MAP); LinkedList<FacetResult> authorFacets = new LinkedList<FacetResult>(); authorFacets.addAll(authorFacetMap.values()); processFacetCategory(authorFacets, reader, mainQueryBits, searcher); Collections.sort(authorFacets, facetResultHitComparator); // final holder of facets LinkedList<FacetResult> finalAuthorFacets; if (authorFacets.size() <= numberOfFacetsToShow) { finalAuthorFacets = authorFacets; } else { finalAuthorFacets = new LinkedList<FacetResult>(); for (int index = 0; index < numberOfFacetsToShow; index++) { finalAuthorFacets.add(authorFacets.get(index)); } } facetResults.put(AUTHOR_MAP, finalAuthorFacets); // get the subjects and create a facet for each subject // determine the number of hits the subject has in the main query HashMap<String, FacetResult> subjectFacetMap = possibleFacets.get(SUBJECT_MAP); LinkedList<FacetResult> subjectFacets = new LinkedList<FacetResult>(); subjectFacets.addAll(subjectFacetMap.values()); processFacetCategory(subjectFacets, reader, mainQueryBits, searcher); Collections.sort(subjectFacets, facetResultHitComparator); // final holder of facets LinkedList<FacetResult> finalSubjectFacets; if (subjectFacets.size() <= numberOfFacetsToShow) { finalSubjectFacets = subjectFacets; } else { finalSubjectFacets = new LinkedList<FacetResult>(); for (int index = 0; index < numberOfFacetsToShow; index++) { finalSubjectFacets.add(subjectFacets.get(index)); } } facetResults.put(SUBJECT_MAP, finalSubjectFacets); // get the language and create a facet for each language // determine the number of hits the language has in the main query HashMap<String, FacetResult> languageFacetMap = possibleFacets.get(LANGUAGE_MAP); LinkedList<FacetResult> languageFacets = new LinkedList<FacetResult>(); languageFacets.addAll(languageFacetMap.values()); processFacetCategory(languageFacets, reader, mainQueryBits, searcher); Collections.sort(languageFacets, facetResultHitComparator); // final holder of facets LinkedList<FacetResult> finalLanguageFacets; if (languageFacets.size() <= numberOfFacetsToShow) { finalLanguageFacets = languageFacets; } else { finalLanguageFacets = new LinkedList<FacetResult>(); for (int index = 0; index < numberOfFacetsToShow; index++) { finalLanguageFacets.add(languageFacets.get(index)); } } facetResults.put(LANGUAGE_MAP, finalLanguageFacets); // get the format and create a facet for each format // determine the number of hits the format has in the main query HashMap<String, FacetResult> formatFacetMap = possibleFacets.get(FORMAT_MAP); LinkedList<FacetResult> formatFacets = new LinkedList<FacetResult>(); formatFacets.addAll(formatFacetMap.values()); processFacetCategory(formatFacets, reader, mainQueryBits, searcher); Collections.sort(formatFacets, facetResultHitComparator); // final holder of facets LinkedList<FacetResult> finalFormatFacets; if (formatFacets.size() <= numberOfFacetsToShow) { finalFormatFacets = formatFacets; } else { finalFormatFacets = new LinkedList<FacetResult>(); for (int index = 0; index < numberOfFacetsToShow; index++) { finalFormatFacets.add(formatFacets.get(index)); } } facetResults.put(FORMAT_MAP, finalFormatFacets); // get the format and create a facet for each format // determine the number of hits the format has in the main query HashMap<String, FacetResult> collectionFacetMap = possibleFacets.get(COLLECTION_MAP); LinkedList<FacetResult> collectionFacets = new LinkedList<FacetResult>(); collectionFacets.addAll(collectionFacetMap.values()); processFacetCategory(collectionFacets, reader, mainQueryBits, searcher); Collections.sort(collectionFacets, facetResultHitComparator); // final holder of facets LinkedList<FacetResult> finalCollectionFacets; if (collectionFacets.size() <= numberOfFacetsToShow) { finalCollectionFacets = collectionFacets; } else { finalCollectionFacets = new LinkedList<FacetResult>(); for (int index = 0; index < numberOfFacetsToShow; index++) { finalCollectionFacets.add(collectionFacets.get(index)); } } facetResults.put(COLLECTION_MAP, finalCollectionFacets); HashSet<Long> ids = new HashSet<Long>(); // end position of id's to collect will be start position plus the number to collect int endPosition = idsToCollectStartPosition + numberOfIdsToCollect; // make sure that the end position is set up correctly if (hits.totalHits < endPosition) { endPosition = hits.totalHits; } String[] fieldsToLoad = { DefaultInstitutionalItemIndexService.ID }; MapFieldSelector fieldSelector = new MapFieldSelector(fieldsToLoad); for (int index = idsToCollectStartPosition; index < endPosition; index++) { Document doc = searcher.doc(hits.scoreDocs[index].doc, fieldSelector); ids.add(NumericUtils.prefixCodedToLong(doc.get(DefaultInstitutionalItemIndexService.ID))); } FacetSearchHelper helper = new FacetSearchHelper(ids, hits.totalHits, facetResults, mainQueryString); return helper; }
From source file:io.puntanegra.fhir.index.lucene.LuceneDocumentIterator.java
License:Apache License
private void fetch() { try {//from w w w. j ava 2s .c o m IndexSearcher searcher = manager.acquire(); try { TimeCounter time = TimeCounter.create().start(); // Search for top documents TopDocs topDocs = null; if (this.sort != null) { sort = sort.rewrite(searcher); topDocs = searcher.searchAfter(after, query, page, sort); } else { topDocs = searcher.searchAfter(after, query, page); } ScoreDoc[] scoreDocs = topDocs.scoreDocs; // Check inf mayHaveMore mayHaveMore = scoreDocs.length == page; // Collect the documents from query result for (ScoreDoc scoreDoc : scoreDocs) { Document document = searcher.doc(scoreDoc.doc, fields); documents.add(Pair.create(document, scoreDoc)); after = scoreDoc; } logger.debug("Get page with {} documents in {}", scoreDocs.length, time.stop()); } finally { manager.release(searcher); } } catch (Exception e) { e.printStackTrace(); throw new FhirIndexException(e, "Error searching in with %s and %s", query, sort); } }
From source file:io.puntanegra.fhir.index.lucene.LuceneRAMIndex.java
License:Apache License
/** * Finds the top {@code count} hits for {@code query} and sorting the hits * by {@code sort}./* w ww.ja va 2 s . com*/ * * @param query * the {@link Query} to search for * @param sort * the {@link Sort} to be applied * @param count * the max number of results to be collected * @param fields * the names of the fields to be loaded * @return the found documents */ public List<Document> search(Query query, Sort sort, Integer count, Set<String> fields) { try { indexWriter.commit(); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); sort = sort.rewrite(searcher); TopDocs topDocs = searcher.search(query, count, sort); ScoreDoc[] scoreDocs = topDocs.scoreDocs; List<Document> documents = new LinkedList<>(); for (ScoreDoc scoreDoc : scoreDocs) { Document document = searcher.doc(scoreDoc.doc, fields); documents.add(document); } searcher.getIndexReader().close(); return documents; } catch (IOException e) { throw new FhirIndexException(e, "Error while searching"); } }
From source file:it.pronetics.madstore.repository.index.impl.LuceneSearcher.java
License:Apache License
private PagingList<SearchResult> doSearch(Query query, int offset, int max) throws Exception { if (max == 0) { max = LuceneIndexManager.DEFAULT_MAX_SEARCH_RESULTS; }/*from w w w .ja v a 2 s.c o m*/ int limit = offset + max; IndexSearcher searcher = threadLocalSeacher.get(); TopFieldDocs topFieldDocs = searcher.search(query, null, limit, new Sort(SortField.FIELD_SCORE)); PagingList<SearchResult> entryItems = new PagingList<SearchResult>(new ArrayList<SearchResult>(), offset, max, topFieldDocs.totalHits); for (int i = offset; i < (offset + max) && i < topFieldDocs.totalHits; i++) { Document doc = searcher.doc(topFieldDocs.scoreDocs[i].doc, new MapFieldSelector( new String[] { LuceneIndexManager.INDEX_COLLECTION_KEY, LuceneIndexManager.INDEX_ENTRY_KEY })); String collectionKey = doc.get(LuceneIndexManager.INDEX_COLLECTION_KEY); String entryKey = doc.get(LuceneIndexManager.INDEX_ENTRY_KEY); if (collectionKey != null && entryKey != null) { entryItems.add(new SearchResult(collectionKey, entryKey)); } else { LOG.warn("Found an entry with missing collection ({}) or entry ({}) key.", collectionKey, entryKey); } } return entryItems; }