Example usage for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype

public final Document document(int docID, Set<String> fieldsToLoad) throws IOException

Source Link

Document

Like #document(int) but only loads the specified fields.

Usage

From source file:org.apache.blur.manager.IndexManager.java

License:Apache License

private static String getRowId(IndexReader reader, int docId) throws CorruptIndexException, IOException {
    reader.document(docId, new StoredFieldVisitor() {
        @Override/*  ww  w  .ja  v  a2s  .c  o  m*/
        public Status needsField(FieldInfo fieldInfo) throws IOException {
            if (ROW_ID.equals(fieldInfo.name)) {
                return StoredFieldVisitor.Status.STOP;
            }
            return StoredFieldVisitor.Status.NO;
        }
    });
    return reader.document(docId).get(ROW_ID);
}

From source file:org.apache.jackrabbit.core.JahiaSearchManager.java

License:Open Source License

private void addJahiaDependencies(final Set<NodeId> removedIds, final Map<NodeId, EventImpl> addedStates,
        List<EventImpl> propEvents, final Set<NodeId> nodeEventRemovedIds)
        throws RepositoryException, IOException {

    Set<NodeId> hierarchyNodeIds = getReMovedOrRenamedHierarchicalNodes(nodeEventRemovedIds);
    if (!hierarchyNodeIds.isEmpty()) {
        // if a node which is referenced with a hierarchical faceting property is moved/renamed, we need to re-index the nodes
        // referring to it
        final IndexReader reader = ((SearchIndex) getQueryHandler()).getIndexReader();
        final Searcher searcher = new IndexSearcher(reader);
        try {/*from   w  w  w .  j  a v  a 2 s. com*/
            int removeSubListStart = 0;
            List<NodeId> removeList = new ArrayList<NodeId>(hierarchyNodeIds);
            int removeSubListEnd = Math.min(removeList.size(), BooleanQuery.getMaxClauseCount());
            while (removeSubListStart < removeList.size()) {
                long timer = System.currentTimeMillis();
                BooleanQuery query = new BooleanQuery(true);
                for (final NodeId nodeId : new ArrayList<NodeId>(
                        removeList.subList(removeSubListStart, removeSubListEnd))) {
                    TermQuery termQuery = new TermQuery(
                            new Term(JahiaNodeIndexer.FACET_HIERARCHY, nodeId.toString()));
                    query.add(new BooleanClause(termQuery, BooleanClause.Occur.SHOULD));
                }
                searcher.search(query, new AbstractHitCollector() {
                    public void collect(int doc, float score) {
                        try {
                            String uuid = reader.document(doc, FieldSelectors.UUID).get("_:UUID");
                            addIdToBeIndexed(new NodeId(uuid), removedIds, addedStates);
                        } catch (Exception e) {
                            log.warn(
                                    "Documents referencing moved/renamed hierarchy facet nodes may not be updated",
                                    e);
                        }
                    }
                });
                if (log.isDebugEnabled()) {
                    log.debug("Facet hierarchy search in {} ms",
                            new Object[] { (System.currentTimeMillis() - timer) });
                }
                removeSubListStart += BooleanQuery.getMaxClauseCount();
                removeSubListEnd = Math.min(removeList.size(),
                        removeSubListEnd + BooleanQuery.getMaxClauseCount());
            }
        } finally {
            searcher.close();
            Util.closeOrRelease(reader);
        }
    }

    // index also translation subnodes, unless only properties are changed, which are excluded from copying down to
    // translation nodes
    if (!addedStates.isEmpty() && !areAllPropertiesCopyExcluded(propEvents)) {
        for (final NodeId node : new HashSet<NodeId>(addedStates.keySet())) {
            if (itemMgr.hasItemState(node)) {
                try {
                    for (ChildNodeEntry childNodeEntry : ((NodeState) itemMgr.getItemState(node))
                            .getChildNodeEntries()) {
                        if (childNodeEntry.getName().getLocalName()
                                .startsWith(TRANSLATION_LOCALNODENAME_PREFIX)) {
                            try {
                                addIdToBeIndexed(childNodeEntry.getId(), removedIds, addedStates);
                            } catch (ItemStateException e) {
                                log.warn("Index of translation node may not be updated", e);
                            }
                        }
                    }
                } catch (ItemStateException e) {
                    log.warn("Index of translation node may not be updated", e);
                }
            }
        }
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java

License:Apache License

/**
 * {@inheritDoc}/*from w w  w . j  a  v  a  2  s.  com*/
 */
public Iterable<NodeId> getWeaklyReferringNodes(NodeId id) throws RepositoryException, IOException {
    final List<Integer> docs = new ArrayList<Integer>();
    final List<NodeId> ids = new ArrayList<NodeId>();
    final IndexReader reader = getIndexReader();
    try {
        IndexSearcher searcher = new IndexSearcher(reader);
        try {
            Query q = new TermQuery(new Term(FieldNames.WEAK_REFS, id.toString()));
            searcher.search(q, new HitCollector() {
                public void collect(int doc, float score) {
                    docs.add(doc);
                }
            });
        } finally {
            searcher.close();
        }
        for (Integer doc : docs) {
            Document d = reader.document(doc, FieldSelectors.UUID);
            ids.add(new NodeId(d.get(FieldNames.UUID)));
        }
    } finally {
        Util.closeOrRelease(reader);
    }
    return ids;
}

From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java

License:Apache License

/**
 * {@inheritDoc}/*from ww  w .j  a  va 2s  .com*/
 */
public Iterable<NodeId> getWeaklyReferringNodes(NodeId id) throws RepositoryException, IOException {
    final List<Integer> docs = new ArrayList<Integer>();
    final List<NodeId> ids = new ArrayList<NodeId>();
    final IndexReader reader = getIndexReader();
    try {
        IndexSearcher searcher = new IndexSearcher(reader);
        try {
            Query q = new TermQuery(new Term(FieldNames.WEAK_REFS, id.toString()));
            searcher.search(q, new HitCollector() {
                public void collect(int doc, float score) {
                    docs.add(doc);
                }
            });
        } finally {
            searcher.close();
        }
        for (Integer doc : docs) {
            Document d = reader.document(doc, FieldSelectors.UUID);
            UUID uuid = UUID.fromString(d.get(FieldNames.UUID));
            ids.add(new NodeId(uuid));
        }
    } finally {
        Util.closeOrRelease(reader);
    }
    return ids;
}

From source file:org.apache.jackrabbit.core.query.lucene.CachingIndexReader.java

License:Apache License

/**
 * Initializes the {@link #parents} <code>DocId</code> array.
 *
 * @param reader the underlying index reader.
 * @throws IOException if an error occurs while reading from the index.
 *//*from  w w  w.  j  av a2 s  .  c  om*/
private void initializeParents(IndexReader reader) throws IOException {
    long time = System.currentTimeMillis();
    Map docs = new HashMap();
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (!reader.isDeleted(i)) {
            Document doc = reader.document(i, FieldSelectors.UUID_AND_PARENT);
            UUID uuid = UUID.fromString(doc.get(FieldNames.UUID));
            UUID parent = null;
            try {
                parent = UUID.fromString(doc.get(FieldNames.PARENT));
            } catch (IllegalArgumentException e) {
                // root node does not have a parent
            }
            NodeInfo info = new NodeInfo(i, uuid, parent);
            docs.put(uuid, info);
        }
    }
    double foreignParents = 0;
    Iterator it = docs.values().iterator();
    while (it.hasNext()) {
        NodeInfo info = (NodeInfo) it.next();
        NodeInfo parent = (NodeInfo) docs.get(info.parent);
        if (parent != null) {
            parents[info.docId] = DocId.create(parent.docId);
        } else if (info.parent != null) {
            foreignParents++;
            parents[info.docId] = DocId.create(info.parent);
        } else {
            // no parent -> root node
            parents[info.docId] = DocId.NULL;
        }
    }
    if (log.isDebugEnabled()) {
        NumberFormat nf = NumberFormat.getPercentInstance();
        nf.setMaximumFractionDigits(1);
        time = System.currentTimeMillis() - time;
        if (parents.length > 0) {
            foreignParents /= parents.length;
        }
        log.debug("initialized {} DocIds in {} ms, {} foreign parents",
                new Object[] { new Integer(parents.length), new Long(time), nf.format(foreignParents) });
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.JahiaLuceneQueryFactoryImpl.java

License:Open Source License

/**
 * Get a String array of indexed fields for running quick checks
 * [0] the uuid of the language independent node
 * [1] the acl-id/* w  ww  .j  a va  2  s .  c  om*/
 * [2] "1" if visibility rule is set for node
 * [3] "true" node is published / "false" node is not published
 */
private IndexedNodeInfo getIndexedNodeInfo(ScoreNode sn, IndexReader reader, final boolean onlyMainNodeUuid)
        throws IOException {
    IndexedNodeInfo info = new IndexedNodeInfo(sn.getDoc(reader));

    Document doc = reader.document(info.getDocNumber(),
            onlyMainNodeUuid ? ONLY_MAIN_NODE_UUID : OPTIMIZATION_FIELDS);

    if (doc.getField(JahiaNodeIndexer.TRANSLATED_NODE_PARENT) != null) {
        info.setMainNodeUuid(doc.getField(FieldNames.PARENT).stringValue());
    } else {
        info.setMainNodeUuid(sn.getNodeId().toString());
    }
    if (!onlyMainNodeUuid) {
        if (isAclUuidInIndex()) {
            Field aclUuidField = doc.getField(JahiaNodeIndexer.ACL_UUID);
            if (aclUuidField != null) {
                info.setAclUuid(aclUuidField.stringValue());
            }
        }
        Field checkVisibilityField = doc.getField(JahiaNodeIndexer.CHECK_VISIBILITY);
        if (checkVisibilityField != null) {
            info.setCheckVisibility(checkVisibilityField.stringValue());
        }
        Field publishedField = doc.getField(JahiaNodeIndexer.PUBLISHED);
        if (publishedField != null) {
            info.setPublished(publishedField.stringValue());
        }
        Field[] checkInvalidLanguagesField = doc.getFields(JahiaNodeIndexer.INVALID_LANGUAGES);
        if (checkInvalidLanguagesField != null && checkInvalidLanguagesField.length > 0) {
            for (Field field : checkInvalidLanguagesField) {
                info.addInvalidLanguages(field.stringValue());
            }
        }
    }
    return info;
}

From source file:org.apache.jackrabbit.core.query.lucene.JahiaSearchIndex.java

License:Open Source License

/**
 * {@inheritDoc}// w w w . j av a  2  s.co m
 */
public Iterable<NodeId> getWeaklyReferringNodes(NodeId id) throws RepositoryException, IOException {
    final List<Integer> docs = new ArrayList<Integer>();
    final List<NodeId> ids = new ArrayList<NodeId>();
    final IndexReader reader = getIndexReader(false);
    try {
        IndexSearcher searcher = new IndexSearcher(reader);
        try {
            Query q = new TermQuery(new Term(FieldNames.WEAK_REFS, id.toString()));
            searcher.search(q, new AbstractHitCollector() {
                public void collect(int doc, float score) {
                    docs.add(doc);
                }
            });
        } finally {
            searcher.close();
        }
        for (Integer doc : docs) {
            Document d = reader.document(doc, FieldSelectors.UUID);
            ids.add(new NodeId(d.get(FieldNames.UUID)));
        }
    } finally {
        Util.closeOrRelease(reader);
    }
    return ids;
}

From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java

License:Apache License

/**
 * {@inheritDoc}//w  w w. j  av  a2s.  c  o  m
 */
public Iterable<NodeId> getWeaklyReferringNodes(NodeId id) throws RepositoryException, IOException {
    final List<Integer> docs = new ArrayList<Integer>();
    final List<NodeId> ids = new ArrayList<NodeId>();
    final IndexReader reader = getIndexReader();
    try {
        IndexSearcher searcher = new IndexSearcher(reader);
        try {
            Query q = new TermQuery(new Term(FieldNames.WEAK_REFS, id.toString()));
            searcher.search(q, new AbstractHitCollector() {
                @Override
                public void collect(int doc, float score) {
                    docs.add(doc);
                }
            });
        } finally {
            searcher.close();
        }
        for (Integer doc : docs) {
            Document d = reader.document(doc, FieldSelectors.UUID);
            ids.add(new NodeId(d.get(FieldNames.UUID)));
        }
    } finally {
        Util.closeOrRelease(reader);
    }
    return ids;
}

From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java

License:Apache License

List<Document> getNodeDocuments(NodeId id) throws RepositoryException, IOException {
    final List<Integer> docIds = new ArrayList<Integer>(1);
    final List<Document> docs = new ArrayList<Document>();
    final IndexReader reader = getIndexReader();
    try {/*from w  w w.j av a2  s.c o m*/
        IndexSearcher searcher = new IndexSearcher(reader);
        try {
            Query q = new TermQuery(new Term(FieldNames.UUID, id.toString()));
            searcher.search(q, new AbstractHitCollector() {
                @Override
                protected void collect(final int doc, final float score) {
                    docIds.add(doc);
                }
            });
            for (Integer docId : docIds) {
                docs.add(reader.document(docId, FieldSelectors.UUID_AND_PARENT));
            }
        } finally {
            searcher.close();
        }
    } finally {
        Util.closeOrRelease(reader);
    }
    return docs;
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndex.java

License:Apache License

@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
    final Filter filter = plan.getFilter();
    FullTextExpression ft = filter.getFullTextConstraint();
    final Set<String> relPaths = getRelativePaths(ft);
    if (relPaths.size() > 1) {
        return new MultiLuceneIndex(filter, rootState, relPaths).query();
    }/* w w  w  .  ja  va2s .  c o  m*/

    final String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next();
    // we only restrict non-full-text conditions if there is
    // no relative property in the full-text constraint
    final boolean nonFullTextConstraints = parent.isEmpty();
    final int parentDepth = getDepth(parent);
    QueryEngineSettings settings = filter.getQueryEngineSettings();
    Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {
        private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();
        private final Set<String> seenPaths = Sets.newHashSet();
        private ScoreDoc lastDoc;
        private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;
        private boolean noDocs = false;
        private long lastSearchIndexerVersion;

        @Override
        protected LuceneResultRow computeNext() {
            while (!queue.isEmpty() || loadDocs()) {
                return queue.remove();
            }
            return endOfData();
        }

        private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt)
                throws IOException {
            IndexReader reader = searcher.getIndexReader();
            PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
            reader.document(doc.doc, visitor);
            String path = visitor.getPath();
            if (path != null) {
                if ("".equals(path)) {
                    path = "/";
                }
                if (!parent.isEmpty()) {
                    // TODO OAK-828 this breaks node aggregation
                    // get the base path
                    // ensure the path ends with the given
                    // relative path
                    // if (!path.endsWith("/" + parent)) {
                    // continue;
                    // }
                    path = getAncestorPath(path, parentDepth);
                    // avoid duplicate entries
                    if (seenPaths.contains(path)) {
                        return null;
                    }
                    seenPaths.add(path);
                }

                return new LuceneResultRow(path, doc.score, excerpt);
            }
            return null;
        }

        /**
         * Loads the lucene documents in batches
         * @return true if any document is loaded
         */
        private boolean loadDocs() {

            if (noDocs) {
                return false;
            }

            ScoreDoc lastDocToRecord = null;

            IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH));
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(),
                        nonFullTextConstraints, indexNode.getDefinition());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TopDocs docs;
                    long time = System.currentTimeMillis();
                    checkForIndexVersionChange(searcher);
                    while (true) {
                        if (lastDoc != null) {
                            LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
                            docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
                        } else {
                            LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
                            docs = searcher.search(query, nextBatchSize);
                        }
                        time = System.currentTimeMillis() - time;
                        LOG.debug("... took {} ms", time);
                        nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);

                        PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT);
                        boolean addExcerpt = restriction != null && restriction.isNotNullRestriction();

                        Analyzer analyzer = indexNode.getDefinition().getAnalyzer();

                        if (addExcerpt) {
                            // setup highlighter
                            QueryScorer scorer = new QueryScorer(query);
                            scorer.setExpandMultiTermQuery(true);
                            highlighter.setFragmentScorer(scorer);
                        }

                        for (ScoreDoc doc : docs.scoreDocs) {
                            String excerpt = null;
                            if (addExcerpt) {
                                excerpt = getExcerpt(analyzer, searcher, doc);
                            }

                            LuceneResultRow row = convertToRow(doc, searcher, excerpt);
                            if (row != null) {
                                queue.add(row);
                            }
                            lastDocToRecord = doc;
                        }

                        if (queue.isEmpty() && docs.scoreDocs.length > 0) {
                            lastDoc = lastDocToRecord;
                        } else {
                            break;
                        }
                    }
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
                    SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade
                            .getLuceneRequest();
                    noDocs = true;
                    SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);

                    // ACL filter spellchecks
                    Collection<String> suggestedWords = new ArrayList<String>(suggestWords.length);
                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST,
                            indexNode.getDefinition().getAnalyzer());
                    for (SuggestWord suggestion : suggestWords) {
                        Query query = qp.createPhraseQuery(FieldNames.SUGGEST, suggestion.string);
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) {
                                    suggestedWords.add(suggestion.string);
                                    break;
                                }
                            }
                        }
                    }

                    queue.add(new LuceneResultRow(suggestedWords));
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
                    SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade
                            .getLuceneRequest();
                    noDocs = true;
                    List<Lookup.LookupResult> lookupResults = SuggestHelper
                            .getSuggestions(indexNode.getLookup(), suggestQuery);

                    // ACL filter suggestions
                    Collection<String> suggestedWords = new ArrayList<String>(lookupResults.size());
                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.FULLTEXT,
                            indexNode.getDefinition().getAnalyzer());
                    for (Lookup.LookupResult suggestion : lookupResults) {
                        Query query = qp.createPhraseQuery(FieldNames.FULLTEXT, suggestion.key.toString());
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) {
                                    suggestedWords.add(
                                            "{term=" + suggestion.key + ",weight=" + suggestion.value + "}");
                                    break;
                                }
                            }
                        }
                    }

                    queue.add(new LuceneResultRow(suggestedWords));
                }
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LuceneIndex.this, e);
            } finally {
                indexNode.release();
            }

            if (lastDocToRecord != null) {
                this.lastDoc = lastDocToRecord;
            }

            return !queue.isEmpty();
        }

        private void checkForIndexVersionChange(IndexSearcher searcher) {
            long currentVersion = LucenePropertyIndex.getVersion(searcher);
            if (currentVersion != lastSearchIndexerVersion && lastDoc != null) {
                lastDoc = null;
                LOG.debug("Change in index version detected {} => {}. Query would be performed without "
                        + "offset", currentVersion, lastSearchIndexerVersion);
            }
            this.lastSearchIndexerVersion = currentVersion;
        }
    };
    SizeEstimator sizeEstimator = new SizeEstimator() {
        @Override
        public long getSize() {
            IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH));
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(),
                        nonFullTextConstraints, indexNode.getDefinition());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TotalHitCountCollector collector = new TotalHitCountCollector();
                    searcher.search(query, collector);
                    int totalHits = collector.getTotalHits();
                    LOG.debug("Estimated size for query {} is {}", query, totalHits);
                    return totalHits;
                }
                LOG.debug("Estimated size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LuceneIndex.this, e);
            } finally {
                indexNode.release();
            }
            return -1;
        }
    };
    return new LucenePathCursor(itr, settings, sizeEstimator);
}