List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID, Set<String> fieldsToLoad) throws IOException
From source file:org.fao.geonet.kernel.search.LuceneSearcher.java
License:Open Source License
/** * TODO javadoc.//from w ww . j a v a2 s.c om * * @param elSummary * @param reader * @param sdocs * @param summaryMaps * @return */ private static Map<String, Map<String, Integer>> buildSummaryMaps(Element elSummary, IndexReader reader, ScoreDoc[] sdocs, final Map<String, Map<String, Integer>> summaryMaps) { elSummary.setAttribute("hitsusedforsummary", sdocs.length + ""); FieldSelector keySelector = new FieldSelector() { public final FieldSelectorResult accept(String name) { if (summaryMaps.get(name) != null) return FieldSelectorResult.LOAD; else return FieldSelectorResult.NO_LOAD; } }; for (ScoreDoc sdoc : sdocs) { Document doc = null; try { doc = reader.document(sdoc.doc, keySelector); } catch (Exception e) { Log.error(Geonet.SEARCH_ENGINE, e.getMessage() + " Caused Failure to get document " + sdoc.doc, e); } for (String key : summaryMaps.keySet()) { Map<String, Integer> summary = summaryMaps.get(key); String hits[] = doc.getValues(key); if (hits != null) { for (String info : hits) { Integer catCount = summary.get(info); if (catCount == null) { catCount = 1; } else { catCount = catCount + 1; } summary.put(info, catCount); } } } } return summaryMaps; }
From source file:org.fao.geonet.kernel.search.LuceneSearcher.java
License:Open Source License
/** * TODO javadoc.//from w ww . j a va 2 s . c o m * * @param webappName * @param priorityLang * @param idField * @param id * @param fieldnames * @return * @throws Exception */ private static Map<String, String> getMetadataFromIndex(String webappName, String priorityLang, String idField, String id, List<String> fieldnames) throws Exception { MapFieldSelector selector = new MapFieldSelector(fieldnames); IndexReader reader; LuceneIndexReaderFactory factory = null; SearchManager searchmanager = null; ServiceContext context = ServiceContext.get(); if (context != null) { GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME); searchmanager = gc.getSearchmanager(); reader = searchmanager.getIndexReader(priorityLang); } else { File luceneDir = new File(System.getProperty(webappName + ".lucene.dir", webappName), "nonspatial"); factory = new LuceneIndexReaderFactory(luceneDir); reader = factory.getReader(priorityLang); } Searcher searcher = new IndexSearcher(reader); Map<String, String> values = new HashMap<String, String>(); try { TermQuery query = new TermQuery(new Term(idField, id)); SettingInfo settingInfo = _sm.get_settingInfo(); boolean sortRequestedLanguageOnTop = settingInfo.getRequestedLanguageOnTop(); if (Log.isDebugEnabled(Geonet.LUCENE)) Log.debug(Geonet.LUCENE, "sortRequestedLanguageOnTop: " + sortRequestedLanguageOnTop); Sort sort = LuceneSearcher.makeSort(Collections.<Pair<String, Boolean>>emptyList(), priorityLang, sortRequestedLanguageOnTop); Filter filter = NoFilterFilter.instance(); TopDocs tdocs = searcher.search(query, filter, 1, sort); for (ScoreDoc sdoc : tdocs.scoreDocs) { Document doc = reader.document(sdoc.doc, selector); for (String fieldname : fieldnames) { values.put(fieldname, doc.get(fieldname)); } } } catch (CorruptIndexException e) { // TODO: handle exception Log.error(Geonet.LUCENE, e.getMessage()); } catch (IOException e) { // TODO: handle exception Log.error(Geonet.LUCENE, e.getMessage()); } finally { try { searcher.close(); } finally { if (factory != null) { factory.close(); } else if (searchmanager != null) { searchmanager.releaseIndexReader(reader); } } } return values; }
From source file:org.fao.geonet.kernel.search.SearchManager.java
License:Open Source License
/** * TODO javadoc.//from w w w.java 2 s . co m * * @return * @throws Exception */ public Set<Integer> getDocsWithXLinks() throws Exception { IndexReader reader = getIndexReader(null); try { FieldSelector idXLinkSelector = new FieldSelector() { public final FieldSelectorResult accept(String name) { if (name.equals("_id") || name.equals("_hasxlinks")) return FieldSelectorResult.LOAD; else return FieldSelectorResult.NO_LOAD; } }; Set<Integer> docs = new LinkedHashSet<Integer>(); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.isDeleted(i)) continue; // FIXME: strange lucene hack: sometimes it tries to load a deleted document Document doc = reader.document(i, idXLinkSelector); String id = doc.get("_id"); String hasxlinks = doc.get("_hasxlinks"); if (Log.isDebugEnabled(Geonet.INDEX_ENGINE)) Log.debug(Geonet.INDEX_ENGINE, "Got id " + id + " : '" + hasxlinks + "'"); if (id == null) { Log.error(Geonet.INDEX_ENGINE, "Document with no _id field skipped! Document is " + doc); continue; } if (hasxlinks.trim().equals("1")) { docs.add(new Integer(id)); } } return docs; } finally { releaseIndexReader(reader); } }
From source file:org.fao.geonet.kernel.search.SearchManager.java
License:Open Source License
/** * TODO javadoc.//w w w. j a v a 2 s . c om * * @return * @throws Exception */ public Map<String, String> getDocsChangeDate() throws Exception { IndexReader reader = getIndexReader(null); try { FieldSelector idChangeDateSelector = new FieldSelector() { public final FieldSelectorResult accept(String name) { if (name.equals("_id") || name.equals("_changeDate")) return FieldSelectorResult.LOAD; else return FieldSelectorResult.NO_LOAD; } }; int capacity = (int) (reader.maxDoc() / 0.75) + 1; Map<String, String> docs = new HashMap<String, String>(capacity); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.isDeleted(i)) continue; // FIXME: strange lucene hack: sometimes it tries to load a deleted document Document doc = reader.document(i, idChangeDateSelector); String id = doc.get("_id"); if (id == null) { Log.error(Geonet.INDEX_ENGINE, "Document with no _id field skipped! Document is " + doc); continue; } docs.put(id, doc.get("_changeDate")); } return docs; } finally { releaseIndexReader(reader); } }
From source file:org.fao.geonet.kernel.search.spatial.FullScanFilter.java
License:Open Source License
public BitSet bits(final IndexReader reader) throws IOException { final BitSet bits = new BitSet(reader.maxDoc()); final Set<String> matches = loadMatches(); new IndexSearcher(reader).search(_query, new Collector() { private int docBase; // ignore scorer public void setScorer(Scorer scorer) { }/*from w w w .j a v a 2 s .c o m*/ // accept docs out of order (for a BitSet it doesn't matter) public boolean acceptsDocsOutOfOrder() { return true; } public final void collect(int doc) { Document document; try { document = reader.document(doc, _selector); if (matches.contains(document.get("_id"))) { bits.set(docBase + doc); } } catch (Exception e) { throw new RuntimeException(e); } } public void setNextReader(IndexReader reader, int docBase) { this.docBase = docBase; } }); return bits; }
From source file:org.fao.geonet.kernel.search.spatial.SpatialFilter.java
License:Open Source License
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); final Map<String, FeatureId> unrefinedSpatialMatches = unrefinedSpatialMatches(); final Set<FeatureId> matches = new HashSet<FeatureId>(); final Multimap<FeatureId, Integer> docIndexLookup = HashMultimap.create(); if (unrefinedSpatialMatches.isEmpty()) return bits; new IndexSearcher(reader).search(_query, new Collector() { private int docBase; private Document document; // ignore scorer public void setScorer(Scorer scorer) { }/*ww w . j ava 2s .c o m*/ // accept docs out of order (for a BitSet it doesn't matter) public boolean acceptsDocsOutOfOrder() { return true; } public void collect(int doc) { doc = doc + docBase; try { document = reader.document(doc, _selector); String key = document.get("_id"); FeatureId featureId = unrefinedSpatialMatches.get(key); if (featureId != null) { matches.add(featureId); docIndexLookup.put(featureId, doc + docBase); } } catch (Exception e) { throw new RuntimeException(e); } } public void setNextReader(IndexReader reader, int docBase) { this.docBase = docBase; } }); if (matches.isEmpty()) { return bits; } else { return applySpatialFilter(matches, docIndexLookup, bits); } }
From source file:org.gridkit.coherence.search.lucene.LuceneInMemoryIndex.java
License:Apache License
public synchronized void applyIndex(Query query, final Set<Object> keySet, final IndexInvocationContext context) { if (searcher == null) { // index is empty keySet.clear();//from w w w . ja v a 2 s . c om return; } final Set<Object> retained = new HashSet<Object>(); try { searcher.search(query, new Collector() { IndexReader reader; @Override public void setScorer(Scorer scorer) throws IOException { // ignore } @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { this.reader = reader; } @Override public void collect(int doc) throws IOException { Document document = reader.document(doc, DOCUMENT_KEY_SELECTOR); String key64 = document.get(LuceneInMemoryIndex.DOCUMENT_KEY); Binary bin = new Binary(fromBase64(key64)); Object key = context.ensureFilterCompatibleKey(bin); if (keySet.contains(key)) { retained.add(key); } } @Override public boolean acceptsDocsOutOfOrder() { return true; } }); } catch (IOException e) { // should never happen with RAMDirectory throw new RuntimeException(e); } keySet.retainAll(retained); }
From source file:org.hippoecm.repository.FacetedNavigationEngineImpl.java
License:Apache License
public Result doView(String queryName, QueryImpl initialQuery, ContextImpl contextImpl, List<KeyValue<String, String>> facetsQueryList, List<FacetRange> rangeQuery, QueryImpl openQuery, Map<String, Map<String, Count>> resultset, Map<String, String> inheritedFilter, HitsRequested hitsRequested) throws UnsupportedOperationException, IllegalArgumentException { NamespaceMappings nsMappings = getNamespaceMappings(); IndexReader indexReader = null; try {/* w w w .j a va 2 s . co m*/ indexReader = getIndexReader(false); IndexSearcher searcher = new IndexSearcher(indexReader); SetDocIdSetBuilder matchingDocsSetBuilder = new SetDocIdSetBuilder(); BooleanQuery facetsQuery = new FacetsQuery(facetsQueryList, nsMappings).getQuery(); matchingDocsSetBuilder.add(filterDocIdSetPlainLuceneQuery(facetsQuery, indexReader)); BooleanQuery facetRangeQuery = new FacetRangeQuery(rangeQuery, nsMappings, this).getQuery(); matchingDocsSetBuilder.add(filterDocIdSetPlainLuceneQuery(facetRangeQuery, indexReader)); BooleanQuery inheritedFilterQuery = new InheritedFilterQuery(inheritedFilter, nsMappings).getQuery(); matchingDocsSetBuilder.add(filterDocIdSetPlainLuceneQuery(inheritedFilterQuery, indexReader)); org.apache.lucene.search.Query initialLuceneQuery = null; if (initialQuery != null && initialQuery.scopes != null && initialQuery.scopes.length > 0) { if (initialQuery.scopes.length == 1) { initialLuceneQuery = new TermQuery( new Term(ServicingFieldNames.HIPPO_PATH, initialQuery.scopes[0])); } else { initialLuceneQuery = new BooleanQuery(true); for (String scope : initialQuery.scopes) { ((BooleanQuery) initialLuceneQuery) .add(new TermQuery(new Term(ServicingFieldNames.HIPPO_PATH, scope)), Occur.SHOULD); } } } matchingDocsSetBuilder.add(filterDocIdSetPlainLuceneQuery(initialLuceneQuery, indexReader)); FacetFiltersQuery facetFiltersQuery = null; if (initialQuery != null && initialQuery.facetFilters != null) { facetFiltersQuery = new FacetFiltersQuery(initialQuery.facetFilters, nsMappings, this.getTextAnalyzer(), this.getSynonymProvider()); } final BooleanQuery authorizationQuery = contextImpl.getAuthorizationQuery(); if (authorizationQuery != null) { final DocIdSet authorisationIdSet = contextImpl.getAuthorisationIdSet(indexReader); if (authorisationIdSet != null) { matchingDocsSetBuilder.add(authorisationIdSet); } } if (resultset != null) { // If there are more than one facet in the 'resultset' we return an empty result as this is not allowed if (resultset.size() > 1) { log.error("The resultset cannot contain multiple facets"); return new ResultImpl(0, null); } int cardinality = 0; for (String namespacedFacet : resultset.keySet()) { // Not a search involving scoring, thus compute bitsets for facetFiltersQuery & freeSearchInjectedSort if (facetFiltersQuery != null) { if (facetFiltersQuery.isPlainLuceneQuery()) { matchingDocsSetBuilder .add(filterDocIdSetPlainLuceneQuery(facetFiltersQuery.getQuery(), indexReader)); } else { matchingDocsSetBuilder .add(filterDocIdSetJackRabbitQuery(facetFiltersQuery.getQuery(), indexReader)); } } if (openQuery != null) { QueryAndSort queryAndSort = openQuery.getLuceneQueryAndSort(contextImpl); // open query is always a jackrabbit query matchingDocsSetBuilder.add(filterDocIdSetJackRabbitQuery(queryAndSort.query, indexReader)); } OpenBitSet matchingDocs = matchingDocsSetBuilder.toBitSet(); cardinality = (int) matchingDocs.cardinality(); /* * Nodes not having this facet, still should be counted if they are a hit * in the query without this facet. Therefor, first get the count query without * FacetPropExistsQuery. */ int numHits = 0; if (hitsRequested.isFixedDrillPath()) { // only in the case of the fixed drillpath we use the count where the facet does not need to exist numHits = (int) matchingDocs.cardinality(); } ParsedFacet parsedFacet; try { parsedFacet = ParsedFacet.getInstance(namespacedFacet); } catch (Exception e) { log.error("Error parsing facet: ", e); return new ResultImpl(0, null); } String propertyName = ServicingNameFormat.getInteralPropertyPathName(nsMappings, parsedFacet.getNamespacedProperty()); /* * facetPropExists: the node must have the property as facet */ matchingDocsSetBuilder.add(filterDocIdSetPlainLuceneQuery( new FacetPropExistsQuery(propertyName).getQuery(), indexReader)); matchingDocs = matchingDocsSetBuilder.toBitSet(); cardinality = (int) matchingDocs.cardinality(); // this method populates the facetValueCountMap for the current facet // index reader is instance of JackrabbitIndexReader : we need the wrapped multi-index reader as // cache key : since during deletes only, the backing index reader can stay the same, we // also need to use numDocs to be sure we get the right cached values Object[] keyObjects = { matchingDocs, propertyName, parsedFacet, indexReader.getCoreCacheKey(), indexReader.numDocs() }; FVCKey fvcKey = new FVCKey(keyObjects); Map<String, Count> facetValueCountMap = facetValueCountCache.getIfPresent(fvcKey); if (facetValueCountMap == null) { facetValueCountMap = new HashMap<String, Count>(); populateFacetValueCountMap(propertyName, parsedFacet, facetValueCountMap, matchingDocs, indexReader); facetValueCountCache.put(fvcKey, facetValueCountMap); log.debug("Caching new facet value count map"); } else { log.debug("Reusing previously cached facet value count map"); } Map<String, Count> resultFacetValueCountMap = resultset.get(namespacedFacet); resultFacetValueCountMap.putAll(facetValueCountMap); // set the numHits value if (hitsRequested.isFixedDrillPath()) { return new ResultImpl(numHits, null); } } return new ResultImpl(cardinality, null); } else { // resultset is null, so search for HippoNodeType.HIPPO_RESULTSET if (!hitsRequested.isResultRequested()) { // No search with SCORING involved, this everything can be done with BitSet's if (facetFiltersQuery != null && facetFiltersQuery.getQuery().clauses().size() > 0) { matchingDocsSetBuilder .add(filterDocIdSetPlainLuceneQuery(facetFiltersQuery.getQuery(), indexReader)); } if (openQuery != null) { QueryAndSort queryAndSort = openQuery.getLuceneQueryAndSort(contextImpl); matchingDocsSetBuilder.add(filterDocIdSetJackRabbitQuery(queryAndSort.query, indexReader)); } int size = (int) matchingDocsSetBuilder.toBitSet().cardinality(); return new ResultImpl(size, null); } else { BooleanQuery searchQuery = new BooleanQuery(false); Sort freeSearchInjectedSort = null; if (facetFiltersQuery != null && facetFiltersQuery.getQuery().clauses().size() > 0) { searchQuery.add(facetFiltersQuery.getQuery(), Occur.MUST); } if (openQuery != null) { QueryAndSort queryAndSort = openQuery.getLuceneQueryAndSort(contextImpl); if (queryAndSort.query != null) { searchQuery.add(queryAndSort.query, Occur.MUST); } freeSearchInjectedSort = queryAndSort.sort; } Set<String> fieldNames = new HashSet<String>(); fieldNames.add(FieldNames.UUID); FieldSelector fieldSelector = new SetBasedFieldSelector(fieldNames, new HashSet<String>()); int fetchTotal = hitsRequested.getOffset() + hitsRequested.getLimit(); Sort sort = null; if (freeSearchInjectedSort != null) { // we already have a sort from the xpath or sql free search. Use this one sort = freeSearchInjectedSort; } else if (hitsRequested.getOrderByList().size() > 0) { List<Path> orderPropertiesList = new ArrayList<Path>(); List<Boolean> ascSpecsList = new ArrayList<Boolean>(); for (OrderBy orderBy : hitsRequested.getOrderByList()) { try { Name orderByProp = NameFactoryImpl.getInstance().create(orderBy.getName()); boolean isAscending = !orderBy.isDescending(); orderPropertiesList.add(createPath(orderByProp)); ascSpecsList.add(isAscending); } catch (IllegalArgumentException e) { log.warn("Skip property '{}' because cannot create a Name for it: {}", orderBy.getName(), e.toString()); } } if (orderPropertiesList.size() > 0) { Path[] orderProperties = orderPropertiesList .toArray(new Path[orderPropertiesList.size()]); boolean[] ascSpecs = new boolean[ascSpecsList.size()]; int i = 0; for (Boolean b : ascSpecsList) { ascSpecs[i] = b; i++; } sort = new Sort(createSortFields(orderProperties, ascSpecs, new String[orderProperties.length])); } } boolean sortScoreAscending = false; // if the sort is on score descending, we can set it to null as this is the default and more efficient if (sort != null && sort.getSort().length == 1 && sort.getSort()[0].getType() == SortField.SCORE) { if (sort.getSort()[0].getReverse()) { sortScoreAscending = true; } else { // we can skip sort as it is on score descending sort = null; } } TopDocs tfDocs; org.apache.lucene.search.Query query = searchQuery; if (searchQuery.clauses().size() == 0) { // add a match all query // searchQuery.add(new MatchAllDocsQuery(), Occur.MUST); query = new MatchAllDocsQuery(); } if (sort == null) { // when sort == null, use this search without search as is more efficient Filter filterToApply = new DocIdSetFilter(matchingDocsSetBuilder.toBitSet()); tfDocs = searcher.search(query, filterToApply, fetchTotal); } else { if (sortScoreAscending) { // we need the entire searchQuery because scoring is involved Filter filterToApply = new DocIdSetFilter(matchingDocsSetBuilder.toBitSet()); tfDocs = searcher.search(query, filterToApply, fetchTotal, sort); } else { // because we have at least one explicit sort, scoring can be skipped. We can use cached bitsets combined with a match all query if (facetFiltersQuery != null) { matchingDocsSetBuilder.add( filterDocIdSetPlainLuceneQuery(facetFiltersQuery.getQuery(), indexReader)); } if (openQuery != null) { QueryAndSort queryAndSort = openQuery.getLuceneQueryAndSort(contextImpl); matchingDocsSetBuilder .add(filterDocIdSetJackRabbitQuery(queryAndSort.query, indexReader)); } Filter filterToApply = new DocIdSetFilter(matchingDocsSetBuilder.toBitSet()); // set query to MatchAllDocsQuery because we have everything as filter now query = new MatchAllDocsQuery(); tfDocs = searcher.search(query, filterToApply, fetchTotal, sort); } } ScoreDoc[] hits = tfDocs.scoreDocs; int position = hitsRequested.getOffset(); // LinkedHashSet because ordering should be kept! Set<NodeId> nodeIdHits = new LinkedHashSet<NodeId>(); while (position < hits.length) { Document d = indexReader.document(hits[position].doc, fieldSelector); Field uuidField = d.getField(FieldNames.UUID); if (uuidField != null) { nodeIdHits.add(NodeId.valueOf(uuidField.stringValue())); } position++; } return new ResultImpl(nodeIdHits.size(), nodeIdHits); } } } catch (IllegalNameException e) { log.error("Error during creating view: ", e); } catch (IOException e) { log.error("Error during creating view: ", e); } finally { if (indexReader != null) { try { // do not call indexReader.close() as ref counting is taken care of by // org.apache.jackrabbit.core.query.lucene.Util#closeOrRelease Util.closeOrRelease(indexReader); } catch (IOException e) { log.error("Exception while closing index reader", e); } } } return new ResultImpl(0, null); }
From source file:org.hippoecm.repository.FacetedNavigationEngineImpl.java
License:Apache License
public Result query(String statement, ContextImpl context) throws InvalidQueryException, RepositoryException { QueryRootNode root = org.apache.jackrabbit.spi.commons.query.QueryParser.parse(statement, "xpath", context.session, getQueryNodeFactory()); org.apache.lucene.search.Query query = LuceneQueryBuilder.createQuery(root, context.session, getContext().getItemStateManager(), getNamespaceMappings(), getTextAnalyzer(), getContext().getPropertyTypeRegistry(), getSynonymProvider(), getIndexFormatVersion(), null); Set<NodeId> nodeIdHits = new LinkedHashSet<NodeId>(); try {//from w w w .ja v a 2s . c o m IndexReader indexReader = getIndexReader(false); IndexSearcher searcher = new IndexSearcher(indexReader); TopDocs tfDocs = searcher.search(query, null, 1000); ScoreDoc[] hits = tfDocs.scoreDocs; int position = 0; Set<String> fieldNames = new HashSet<String>(); fieldNames.add(FieldNames.UUID); FieldSelector fieldSelector = new SetBasedFieldSelector(fieldNames, new HashSet<String>()); // LinkedHashSet because ordering should be kept! while (position < hits.length) { Document d = indexReader.document(hits[position].doc, fieldSelector); Field uuidField = d.getField(FieldNames.UUID); if (uuidField != null) { nodeIdHits.add(NodeId.valueOf(uuidField.stringValue())); } position++; } } catch (IOException ex) { log.warn(ex.getMessage(), ex); } return new ResultImpl(nodeIdHits.size(), nodeIdHits); }
From source file:org.jahia.services.search.jcr.HTMLExcerpt.java
License:Open Source License
@Override public String getExcerpt(NodeId id, int maxFragments, int maxFragmentSize) throws IOException { IndexReader reader = index.getIndexReader(); try {/* w w w.j a v a 2 s . c o m*/ Term idTerm = TermFactory.createUUIDTerm(id.toString()); TermDocs tDocs = reader.termDocs(idTerm); int docNumber; Document doc; try { if (tDocs.next()) { docNumber = tDocs.doc(); doc = reader.document(docNumber, FULLTEXT); } else { // node not found in index return null; } } finally { tDocs.close(); } Fieldable[] fields = doc.getFieldables(FieldNames.FULLTEXT); if (fields.length == 0) { // Avoid to return all index entries as excerpt log.debug("Fulltext field not stored, using {}", JahiaExcerptProvider.class.getName()); JahiaExcerptProvider exProvider = new JahiaExcerptProvider(); exProvider.init(query, index); return exProvider.getExcerpt(id, maxFragments, maxFragmentSize); } else { final String excerpt = super.getExcerpt(id, maxFragments, maxFragmentSize); if (excerpt != null) { return APOS.matcher(excerpt).replaceAll("'"); } else return ""; } } finally { Util.closeOrRelease(reader); } }