List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID, Set<String> fieldsToLoad) throws IOException
From source file:org.apache.blur.manager.IndexManager.java
License:Apache License
private static String getRowId(IndexReader reader, int docId) throws CorruptIndexException, IOException { reader.document(docId, new StoredFieldVisitor() { @Override/* ww w .ja v a2s .c o m*/ public Status needsField(FieldInfo fieldInfo) throws IOException { if (ROW_ID.equals(fieldInfo.name)) { return StoredFieldVisitor.Status.STOP; } return StoredFieldVisitor.Status.NO; } }); return reader.document(docId).get(ROW_ID); }
From source file:org.apache.jackrabbit.core.JahiaSearchManager.java
License:Open Source License
private void addJahiaDependencies(final Set<NodeId> removedIds, final Map<NodeId, EventImpl> addedStates, List<EventImpl> propEvents, final Set<NodeId> nodeEventRemovedIds) throws RepositoryException, IOException { Set<NodeId> hierarchyNodeIds = getReMovedOrRenamedHierarchicalNodes(nodeEventRemovedIds); if (!hierarchyNodeIds.isEmpty()) { // if a node which is referenced with a hierarchical faceting property is moved/renamed, we need to re-index the nodes // referring to it final IndexReader reader = ((SearchIndex) getQueryHandler()).getIndexReader(); final Searcher searcher = new IndexSearcher(reader); try {/*from w w w . j a v a 2 s. com*/ int removeSubListStart = 0; List<NodeId> removeList = new ArrayList<NodeId>(hierarchyNodeIds); int removeSubListEnd = Math.min(removeList.size(), BooleanQuery.getMaxClauseCount()); while (removeSubListStart < removeList.size()) { long timer = System.currentTimeMillis(); BooleanQuery query = new BooleanQuery(true); for (final NodeId nodeId : new ArrayList<NodeId>( removeList.subList(removeSubListStart, removeSubListEnd))) { TermQuery termQuery = new TermQuery( new Term(JahiaNodeIndexer.FACET_HIERARCHY, nodeId.toString())); query.add(new BooleanClause(termQuery, BooleanClause.Occur.SHOULD)); } searcher.search(query, new AbstractHitCollector() { public void collect(int doc, float score) { try { String uuid = reader.document(doc, FieldSelectors.UUID).get("_:UUID"); addIdToBeIndexed(new NodeId(uuid), removedIds, addedStates); } catch (Exception e) { log.warn( "Documents referencing moved/renamed hierarchy facet nodes may not be updated", e); } } }); if (log.isDebugEnabled()) { log.debug("Facet hierarchy search in {} ms", new Object[] { (System.currentTimeMillis() - timer) }); } removeSubListStart += BooleanQuery.getMaxClauseCount(); removeSubListEnd = Math.min(removeList.size(), removeSubListEnd + BooleanQuery.getMaxClauseCount()); } } finally { searcher.close(); Util.closeOrRelease(reader); } } // index also translation subnodes, unless only properties are changed, which are excluded from copying down to // translation nodes if (!addedStates.isEmpty() && !areAllPropertiesCopyExcluded(propEvents)) { for (final NodeId node : new HashSet<NodeId>(addedStates.keySet())) { if (itemMgr.hasItemState(node)) { try { for (ChildNodeEntry childNodeEntry : ((NodeState) itemMgr.getItemState(node)) .getChildNodeEntries()) { if (childNodeEntry.getName().getLocalName() .startsWith(TRANSLATION_LOCALNODENAME_PREFIX)) { try { addIdToBeIndexed(childNodeEntry.getId(), removedIds, addedStates); } catch (ItemStateException e) { log.warn("Index of translation node may not be updated", e); } } } } catch (ItemStateException e) { log.warn("Index of translation node may not be updated", e); } } } } }
From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java
License:Apache License
/** * {@inheritDoc}/*from w w w . j a v a 2 s. com*/ */ public Iterable<NodeId> getWeaklyReferringNodes(NodeId id) throws RepositoryException, IOException { final List<Integer> docs = new ArrayList<Integer>(); final List<NodeId> ids = new ArrayList<NodeId>(); final IndexReader reader = getIndexReader(); try { IndexSearcher searcher = new IndexSearcher(reader); try { Query q = new TermQuery(new Term(FieldNames.WEAK_REFS, id.toString())); searcher.search(q, new HitCollector() { public void collect(int doc, float score) { docs.add(doc); } }); } finally { searcher.close(); } for (Integer doc : docs) { Document d = reader.document(doc, FieldSelectors.UUID); ids.add(new NodeId(d.get(FieldNames.UUID))); } } finally { Util.closeOrRelease(reader); } return ids; }
From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java
License:Apache License
/** * {@inheritDoc}/*from ww w .j a va 2s .com*/ */ public Iterable<NodeId> getWeaklyReferringNodes(NodeId id) throws RepositoryException, IOException { final List<Integer> docs = new ArrayList<Integer>(); final List<NodeId> ids = new ArrayList<NodeId>(); final IndexReader reader = getIndexReader(); try { IndexSearcher searcher = new IndexSearcher(reader); try { Query q = new TermQuery(new Term(FieldNames.WEAK_REFS, id.toString())); searcher.search(q, new HitCollector() { public void collect(int doc, float score) { docs.add(doc); } }); } finally { searcher.close(); } for (Integer doc : docs) { Document d = reader.document(doc, FieldSelectors.UUID); UUID uuid = UUID.fromString(d.get(FieldNames.UUID)); ids.add(new NodeId(uuid)); } } finally { Util.closeOrRelease(reader); } return ids; }
From source file:org.apache.jackrabbit.core.query.lucene.CachingIndexReader.java
License:Apache License
/** * Initializes the {@link #parents} <code>DocId</code> array. * * @param reader the underlying index reader. * @throws IOException if an error occurs while reading from the index. *//*from w w w. j av a2 s . c om*/ private void initializeParents(IndexReader reader) throws IOException { long time = System.currentTimeMillis(); Map docs = new HashMap(); for (int i = 0; i < reader.maxDoc(); i++) { if (!reader.isDeleted(i)) { Document doc = reader.document(i, FieldSelectors.UUID_AND_PARENT); UUID uuid = UUID.fromString(doc.get(FieldNames.UUID)); UUID parent = null; try { parent = UUID.fromString(doc.get(FieldNames.PARENT)); } catch (IllegalArgumentException e) { // root node does not have a parent } NodeInfo info = new NodeInfo(i, uuid, parent); docs.put(uuid, info); } } double foreignParents = 0; Iterator it = docs.values().iterator(); while (it.hasNext()) { NodeInfo info = (NodeInfo) it.next(); NodeInfo parent = (NodeInfo) docs.get(info.parent); if (parent != null) { parents[info.docId] = DocId.create(parent.docId); } else if (info.parent != null) { foreignParents++; parents[info.docId] = DocId.create(info.parent); } else { // no parent -> root node parents[info.docId] = DocId.NULL; } } if (log.isDebugEnabled()) { NumberFormat nf = NumberFormat.getPercentInstance(); nf.setMaximumFractionDigits(1); time = System.currentTimeMillis() - time; if (parents.length > 0) { foreignParents /= parents.length; } log.debug("initialized {} DocIds in {} ms, {} foreign parents", new Object[] { new Integer(parents.length), new Long(time), nf.format(foreignParents) }); } }
From source file:org.apache.jackrabbit.core.query.lucene.JahiaLuceneQueryFactoryImpl.java
License:Open Source License
/** * Get a String array of indexed fields for running quick checks * [0] the uuid of the language independent node * [1] the acl-id/* w ww .j a va 2 s . c om*/ * [2] "1" if visibility rule is set for node * [3] "true" node is published / "false" node is not published */ private IndexedNodeInfo getIndexedNodeInfo(ScoreNode sn, IndexReader reader, final boolean onlyMainNodeUuid) throws IOException { IndexedNodeInfo info = new IndexedNodeInfo(sn.getDoc(reader)); Document doc = reader.document(info.getDocNumber(), onlyMainNodeUuid ? ONLY_MAIN_NODE_UUID : OPTIMIZATION_FIELDS); if (doc.getField(JahiaNodeIndexer.TRANSLATED_NODE_PARENT) != null) { info.setMainNodeUuid(doc.getField(FieldNames.PARENT).stringValue()); } else { info.setMainNodeUuid(sn.getNodeId().toString()); } if (!onlyMainNodeUuid) { if (isAclUuidInIndex()) { Field aclUuidField = doc.getField(JahiaNodeIndexer.ACL_UUID); if (aclUuidField != null) { info.setAclUuid(aclUuidField.stringValue()); } } Field checkVisibilityField = doc.getField(JahiaNodeIndexer.CHECK_VISIBILITY); if (checkVisibilityField != null) { info.setCheckVisibility(checkVisibilityField.stringValue()); } Field publishedField = doc.getField(JahiaNodeIndexer.PUBLISHED); if (publishedField != null) { info.setPublished(publishedField.stringValue()); } Field[] checkInvalidLanguagesField = doc.getFields(JahiaNodeIndexer.INVALID_LANGUAGES); if (checkInvalidLanguagesField != null && checkInvalidLanguagesField.length > 0) { for (Field field : checkInvalidLanguagesField) { info.addInvalidLanguages(field.stringValue()); } } } return info; }
From source file:org.apache.jackrabbit.core.query.lucene.JahiaSearchIndex.java
License:Open Source License
/** * {@inheritDoc}// w w w . j av a 2 s.co m */ public Iterable<NodeId> getWeaklyReferringNodes(NodeId id) throws RepositoryException, IOException { final List<Integer> docs = new ArrayList<Integer>(); final List<NodeId> ids = new ArrayList<NodeId>(); final IndexReader reader = getIndexReader(false); try { IndexSearcher searcher = new IndexSearcher(reader); try { Query q = new TermQuery(new Term(FieldNames.WEAK_REFS, id.toString())); searcher.search(q, new AbstractHitCollector() { public void collect(int doc, float score) { docs.add(doc); } }); } finally { searcher.close(); } for (Integer doc : docs) { Document d = reader.document(doc, FieldSelectors.UUID); ids.add(new NodeId(d.get(FieldNames.UUID))); } } finally { Util.closeOrRelease(reader); } return ids; }
From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java
License:Apache License
/** * {@inheritDoc}//w w w. j av a2s. c o m */ public Iterable<NodeId> getWeaklyReferringNodes(NodeId id) throws RepositoryException, IOException { final List<Integer> docs = new ArrayList<Integer>(); final List<NodeId> ids = new ArrayList<NodeId>(); final IndexReader reader = getIndexReader(); try { IndexSearcher searcher = new IndexSearcher(reader); try { Query q = new TermQuery(new Term(FieldNames.WEAK_REFS, id.toString())); searcher.search(q, new AbstractHitCollector() { @Override public void collect(int doc, float score) { docs.add(doc); } }); } finally { searcher.close(); } for (Integer doc : docs) { Document d = reader.document(doc, FieldSelectors.UUID); ids.add(new NodeId(d.get(FieldNames.UUID))); } } finally { Util.closeOrRelease(reader); } return ids; }
From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java
License:Apache License
List<Document> getNodeDocuments(NodeId id) throws RepositoryException, IOException { final List<Integer> docIds = new ArrayList<Integer>(1); final List<Document> docs = new ArrayList<Document>(); final IndexReader reader = getIndexReader(); try {/*from w w w.j av a2 s.c o m*/ IndexSearcher searcher = new IndexSearcher(reader); try { Query q = new TermQuery(new Term(FieldNames.UUID, id.toString())); searcher.search(q, new AbstractHitCollector() { @Override protected void collect(final int doc, final float score) { docIds.add(doc); } }); for (Integer docId : docIds) { docs.add(reader.document(docId, FieldSelectors.UUID_AND_PARENT)); } } finally { searcher.close(); } } finally { Util.closeOrRelease(reader); } return docs; }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndex.java
License:Apache License
@Override public Cursor query(final IndexPlan plan, NodeState rootState) { final Filter filter = plan.getFilter(); FullTextExpression ft = filter.getFullTextConstraint(); final Set<String> relPaths = getRelativePaths(ft); if (relPaths.size() > 1) { return new MultiLuceneIndex(filter, rootState, relPaths).query(); }/* w w w . ja va2s . c o m*/ final String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next(); // we only restrict non-full-text conditions if there is // no relative property in the full-text constraint final boolean nonFullTextConstraints = parent.isEmpty(); final int parentDepth = getDepth(parent); QueryEngineSettings settings = filter.getQueryEngineSettings(); Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() { private final Deque<LuceneResultRow> queue = Queues.newArrayDeque(); private final Set<String> seenPaths = Sets.newHashSet(); private ScoreDoc lastDoc; private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE; private boolean noDocs = false; private long lastSearchIndexerVersion; @Override protected LuceneResultRow computeNext() { while (!queue.isEmpty() || loadDocs()) { return queue.remove(); } return endOfData(); } private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt) throws IOException { IndexReader reader = searcher.getIndexReader(); PathStoredFieldVisitor visitor = new PathStoredFieldVisitor(); reader.document(doc.doc, visitor); String path = visitor.getPath(); if (path != null) { if ("".equals(path)) { path = "/"; } if (!parent.isEmpty()) { // TODO OAK-828 this breaks node aggregation // get the base path // ensure the path ends with the given // relative path // if (!path.endsWith("/" + parent)) { // continue; // } path = getAncestorPath(path, parentDepth); // avoid duplicate entries if (seenPaths.contains(path)) { return null; } seenPaths.add(path); } return new LuceneResultRow(path, doc.score, excerpt); } return null; } /** * Loads the lucene documents in batches * @return true if any document is loaded */ private boolean loadDocs() { if (noDocs) { return false; } ScoreDoc lastDocToRecord = null; IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH)); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(), nonFullTextConstraints, indexNode.getDefinition()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); TopDocs docs; long time = System.currentTimeMillis(); checkForIndexVersionChange(searcher); while (true) { if (lastDoc != null) { LOG.debug("loading the next {} entries for query {}", nextBatchSize, query); docs = searcher.searchAfter(lastDoc, query, nextBatchSize); } else { LOG.debug("loading the first {} entries for query {}", nextBatchSize, query); docs = searcher.search(query, nextBatchSize); } time = System.currentTimeMillis() - time; LOG.debug("... took {} ms", time); nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000); PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT); boolean addExcerpt = restriction != null && restriction.isNotNullRestriction(); Analyzer analyzer = indexNode.getDefinition().getAnalyzer(); if (addExcerpt) { // setup highlighter QueryScorer scorer = new QueryScorer(query); scorer.setExpandMultiTermQuery(true); highlighter.setFragmentScorer(scorer); } for (ScoreDoc doc : docs.scoreDocs) { String excerpt = null; if (addExcerpt) { excerpt = getExcerpt(analyzer, searcher, doc); } LuceneResultRow row = convertToRow(doc, searcher, excerpt); if (row != null) { queue.add(row); } lastDocToRecord = doc; } if (queue.isEmpty() && docs.scoreDocs.length > 0) { lastDoc = lastDocToRecord; } else { break; } } } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) { SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade .getLuceneRequest(); noDocs = true; SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery); // ACL filter spellchecks Collection<String> suggestedWords = new ArrayList<String>(suggestWords.length); QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().getAnalyzer()); for (SuggestWord suggestion : suggestWords) { Query query = qp.createPhraseQuery(FieldNames.SUGGEST, suggestion.string); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) { suggestedWords.add(suggestion.string); break; } } } } queue.add(new LuceneResultRow(suggestedWords)); } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) { SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade .getLuceneRequest(); noDocs = true; List<Lookup.LookupResult> lookupResults = SuggestHelper .getSuggestions(indexNode.getLookup(), suggestQuery); // ACL filter suggestions Collection<String> suggestedWords = new ArrayList<String>(lookupResults.size()); QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.FULLTEXT, indexNode.getDefinition().getAnalyzer()); for (Lookup.LookupResult suggestion : lookupResults) { Query query = qp.createPhraseQuery(FieldNames.FULLTEXT, suggestion.key.toString()); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) { suggestedWords.add( "{term=" + suggestion.key + ",weight=" + suggestion.value + "}"); break; } } } } queue.add(new LuceneResultRow(suggestedWords)); } } catch (IOException e) { LOG.warn("query via {} failed.", LuceneIndex.this, e); } finally { indexNode.release(); } if (lastDocToRecord != null) { this.lastDoc = lastDocToRecord; } return !queue.isEmpty(); } private void checkForIndexVersionChange(IndexSearcher searcher) { long currentVersion = LucenePropertyIndex.getVersion(searcher); if (currentVersion != lastSearchIndexerVersion && lastDoc != null) { lastDoc = null; LOG.debug("Change in index version detected {} => {}. Query would be performed without " + "offset", currentVersion, lastSearchIndexerVersion); } this.lastSearchIndexerVersion = currentVersion; } }; SizeEstimator sizeEstimator = new SizeEstimator() { @Override public long getSize() { IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH)); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(), nonFullTextConstraints, indexNode.getDefinition()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); int totalHits = collector.getTotalHits(); LOG.debug("Estimated size for query {} is {}", query, totalHits); return totalHits; } LOG.debug("Estimated size: not a Query: {}", luceneRequestFacade.getLuceneRequest()); } catch (IOException e) { LOG.warn("query via {} failed.", LuceneIndex.this, e); } finally { indexNode.release(); } return -1; } }; return new LucenePathCursor(itr, settings, sizeEstimator); }