Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype

public final Document document(int docID, Set<String> fieldsToLoad) throws IOException 

Source Link

Document

Like #document(int) but only loads the specified fields.

Usage

From source file:org.apache.blur.manager.IndexManager.java

License:Apache License

private static String getRowId(IndexReader reader, int docId) throws CorruptIndexException, IOException {
    reader.document(docId, new StoredFieldVisitor() {
        @Override/*  ww  w  .ja  v  a2s  .c  o  m*/
        public Status needsField(FieldInfo fieldInfo) throws IOException {
            if (ROW_ID.equals(fieldInfo.name)) {
                return StoredFieldVisitor.Status.STOP;
            }
            return StoredFieldVisitor.Status.NO;
        }
    });
    return reader.document(docId).get(ROW_ID);
}

From source file:org.apache.jackrabbit.core.JahiaSearchManager.java

License:Open Source License

private void addJahiaDependencies(final Set<NodeId> removedIds, final Map<NodeId, EventImpl> addedStates,
        List<EventImpl> propEvents, final Set<NodeId> nodeEventRemovedIds)
        throws RepositoryException, IOException {

    Set<NodeId> hierarchyNodeIds = getReMovedOrRenamedHierarchicalNodes(nodeEventRemovedIds);
    if (!hierarchyNodeIds.isEmpty()) {
        // if a node which is referenced with a hierarchical faceting property is moved/renamed, we need to re-index the nodes
        // referring to it
        final IndexReader reader = ((SearchIndex) getQueryHandler()).getIndexReader();
        final Searcher searcher = new IndexSearcher(reader);
        try {/*from   w  w  w .  j  a v  a 2 s. com*/
            int removeSubListStart = 0;
            List<NodeId> removeList = new ArrayList<NodeId>(hierarchyNodeIds);
            int removeSubListEnd = Math.min(removeList.size(), BooleanQuery.getMaxClauseCount());
            while (removeSubListStart < removeList.size()) {
                long timer = System.currentTimeMillis();
                BooleanQuery query = new BooleanQuery(true);
                for (final NodeId nodeId : new ArrayList<NodeId>(
                        removeList.subList(removeSubListStart, removeSubListEnd))) {
                    TermQuery termQuery = new TermQuery(
                            new Term(JahiaNodeIndexer.FACET_HIERARCHY, nodeId.toString()));
                    query.add(new BooleanClause(termQuery, BooleanClause.Occur.SHOULD));
                }
                searcher.search(query, new AbstractHitCollector() {
                    public void collect(int doc, float score) {
                        try {
                            String uuid = reader.document(doc, FieldSelectors.UUID).get("_:UUID");
                            addIdToBeIndexed(new NodeId(uuid), removedIds, addedStates);
                        } catch (Exception e) {
                            log.warn(
                                    "Documents referencing moved/renamed hierarchy facet nodes may not be updated",
                                    e);
                        }
                    }
                });
                if (log.isDebugEnabled()) {
                    log.debug("Facet hierarchy search in {} ms",
                            new Object[] { (System.currentTimeMillis() - timer) });
                }
                removeSubListStart += BooleanQuery.getMaxClauseCount();
                removeSubListEnd = Math.min(removeList.size(),
                        removeSubListEnd + BooleanQuery.getMaxClauseCount());
            }
        } finally {
            searcher.close();
            Util.closeOrRelease(reader);
        }
    }

    // index also translation subnodes, unless only properties are changed, which are excluded from copying down to
    // translation nodes
    if (!addedStates.isEmpty() && !areAllPropertiesCopyExcluded(propEvents)) {
        for (final NodeId node : new HashSet<NodeId>(addedStates.keySet())) {
            if (itemMgr.hasItemState(node)) {
                try {
                    for (ChildNodeEntry childNodeEntry : ((NodeState) itemMgr.getItemState(node))
                            .getChildNodeEntries()) {
                        if (childNodeEntry.getName().getLocalName()
                                .startsWith(TRANSLATION_LOCALNODENAME_PREFIX)) {
                            try {
                                addIdToBeIndexed(childNodeEntry.getId(), removedIds, addedStates);
                            } catch (ItemStateException e) {
                                log.warn("Index of translation node may not be updated", e);
                            }
                        }
                    }
                } catch (ItemStateException e) {
                    log.warn("Index of translation node may not be updated", e);
                }
            }
        }
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java

License:Apache License

/**
 * {@inheritDoc}/*from w w  w . j  a  v  a  2  s.  com*/
 */
public Iterable<NodeId> getWeaklyReferringNodes(NodeId id) throws RepositoryException, IOException {
    final List<Integer> docs = new ArrayList<Integer>();
    final List<NodeId> ids = new ArrayList<NodeId>();
    final IndexReader reader = getIndexReader();
    try {
        IndexSearcher searcher = new IndexSearcher(reader);
        try {
            Query q = new TermQuery(new Term(FieldNames.WEAK_REFS, id.toString()));
            searcher.search(q, new HitCollector() {
                public void collect(int doc, float score) {
                    docs.add(doc);
                }
            });
        } finally {
            searcher.close();
        }
        for (Integer doc : docs) {
            Document d = reader.document(doc, FieldSelectors.UUID);
            ids.add(new NodeId(d.get(FieldNames.UUID)));
        }
    } finally {
        Util.closeOrRelease(reader);
    }
    return ids;
}

From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java

License:Apache License

/**
 * {@inheritDoc}/*from ww  w .j  a  va 2s  .com*/
 */
public Iterable<NodeId> getWeaklyReferringNodes(NodeId id) throws RepositoryException, IOException {
    final List<Integer> docs = new ArrayList<Integer>();
    final List<NodeId> ids = new ArrayList<NodeId>();
    final IndexReader reader = getIndexReader();
    try {
        IndexSearcher searcher = new IndexSearcher(reader);
        try {
            Query q = new TermQuery(new Term(FieldNames.WEAK_REFS, id.toString()));
            searcher.search(q, new HitCollector() {
                public void collect(int doc, float score) {
                    docs.add(doc);
                }
            });
        } finally {
            searcher.close();
        }
        for (Integer doc : docs) {
            Document d = reader.document(doc, FieldSelectors.UUID);
            UUID uuid = UUID.fromString(d.get(FieldNames.UUID));
            ids.add(new NodeId(uuid));
        }
    } finally {
        Util.closeOrRelease(reader);
    }
    return ids;
}

From source file:org.apache.jackrabbit.core.query.lucene.CachingIndexReader.java

License:Apache License

/**
 * Initializes the {@link #parents} <code>DocId</code> array.
 *
 * @param reader the underlying index reader.
 * @throws IOException if an error occurs while reading from the index.
 *//*from  w w  w.  j  av a2 s  .  c  om*/
private void initializeParents(IndexReader reader) throws IOException {
    long time = System.currentTimeMillis();
    Map docs = new HashMap();
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (!reader.isDeleted(i)) {
            Document doc = reader.document(i, FieldSelectors.UUID_AND_PARENT);
            UUID uuid = UUID.fromString(doc.get(FieldNames.UUID));
            UUID parent = null;
            try {
                parent = UUID.fromString(doc.get(FieldNames.PARENT));
            } catch (IllegalArgumentException e) {
                // root node does not have a parent
            }
            NodeInfo info = new NodeInfo(i, uuid, parent);
            docs.put(uuid, info);
        }
    }
    double foreignParents = 0;
    Iterator it = docs.values().iterator();
    while (it.hasNext()) {
        NodeInfo info = (NodeInfo) it.next();
        NodeInfo parent = (NodeInfo) docs.get(info.parent);
        if (parent != null) {
            parents[info.docId] = DocId.create(parent.docId);
        } else if (info.parent != null) {
            foreignParents++;
            parents[info.docId] = DocId.create(info.parent);
        } else {
            // no parent -> root node
            parents[info.docId] = DocId.NULL;
        }
    }
    if (log.isDebugEnabled()) {
        NumberFormat nf = NumberFormat.getPercentInstance();
        nf.setMaximumFractionDigits(1);
        time = System.currentTimeMillis() - time;
        if (parents.length > 0) {
            foreignParents /= parents.length;
        }
        log.debug("initialized {} DocIds in {} ms, {} foreign parents",
                new Object[] { new Integer(parents.length), new Long(time), nf.format(foreignParents) });
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.JahiaLuceneQueryFactoryImpl.java

License:Open Source License

/**
 * Get a String array of indexed fields for running quick checks
 * [0] the uuid of the language independent node
 * [1] the acl-id/* w  ww  .j  a va  2  s .  c  om*/
 * [2] "1" if visibility rule is set for node
 * [3] "true" node is published / "false" node is not published
 */
private IndexedNodeInfo getIndexedNodeInfo(ScoreNode sn, IndexReader reader, final boolean onlyMainNodeUuid)
        throws IOException {
    IndexedNodeInfo info = new IndexedNodeInfo(sn.getDoc(reader));

    Document doc = reader.document(info.getDocNumber(),
            onlyMainNodeUuid ? ONLY_MAIN_NODE_UUID : OPTIMIZATION_FIELDS);

    if (doc.getField(JahiaNodeIndexer.TRANSLATED_NODE_PARENT) != null) {
        info.setMainNodeUuid(doc.getField(FieldNames.PARENT).stringValue());
    } else {
        info.setMainNodeUuid(sn.getNodeId().toString());
    }
    if (!onlyMainNodeUuid) {
        if (isAclUuidInIndex()) {
            Field aclUuidField = doc.getField(JahiaNodeIndexer.ACL_UUID);
            if (aclUuidField != null) {
                info.setAclUuid(aclUuidField.stringValue());
            }
        }
        Field checkVisibilityField = doc.getField(JahiaNodeIndexer.CHECK_VISIBILITY);
        if (checkVisibilityField != null) {
            info.setCheckVisibility(checkVisibilityField.stringValue());
        }
        Field publishedField = doc.getField(JahiaNodeIndexer.PUBLISHED);
        if (publishedField != null) {
            info.setPublished(publishedField.stringValue());
        }
        Field[] checkInvalidLanguagesField = doc.getFields(JahiaNodeIndexer.INVALID_LANGUAGES);
        if (checkInvalidLanguagesField != null && checkInvalidLanguagesField.length > 0) {
            for (Field field : checkInvalidLanguagesField) {
                info.addInvalidLanguages(field.stringValue());
            }
        }
    }
    return info;
}

From source file:org.apache.jackrabbit.core.query.lucene.JahiaSearchIndex.java

License:Open Source License

/**
 * {@inheritDoc}// w w w . j av a  2  s.co m
 */
public Iterable<NodeId> getWeaklyReferringNodes(NodeId id) throws RepositoryException, IOException {
    final List<Integer> docs = new ArrayList<Integer>();
    final List<NodeId> ids = new ArrayList<NodeId>();
    final IndexReader reader = getIndexReader(false);
    try {
        IndexSearcher searcher = new IndexSearcher(reader);
        try {
            Query q = new TermQuery(new Term(FieldNames.WEAK_REFS, id.toString()));
            searcher.search(q, new AbstractHitCollector() {
                public void collect(int doc, float score) {
                    docs.add(doc);
                }
            });
        } finally {
            searcher.close();
        }
        for (Integer doc : docs) {
            Document d = reader.document(doc, FieldSelectors.UUID);
            ids.add(new NodeId(d.get(FieldNames.UUID)));
        }
    } finally {
        Util.closeOrRelease(reader);
    }
    return ids;
}

From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java

License:Apache License

/**
 * {@inheritDoc}//w  w w. j  av  a2s.  c  o  m
 */
public Iterable<NodeId> getWeaklyReferringNodes(NodeId id) throws RepositoryException, IOException {
    final List<Integer> docs = new ArrayList<Integer>();
    final List<NodeId> ids = new ArrayList<NodeId>();
    final IndexReader reader = getIndexReader();
    try {
        IndexSearcher searcher = new IndexSearcher(reader);
        try {
            Query q = new TermQuery(new Term(FieldNames.WEAK_REFS, id.toString()));
            searcher.search(q, new AbstractHitCollector() {
                @Override
                public void collect(int doc, float score) {
                    docs.add(doc);
                }
            });
        } finally {
            searcher.close();
        }
        for (Integer doc : docs) {
            Document d = reader.document(doc, FieldSelectors.UUID);
            ids.add(new NodeId(d.get(FieldNames.UUID)));
        }
    } finally {
        Util.closeOrRelease(reader);
    }
    return ids;
}

From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java

License:Apache License

List<Document> getNodeDocuments(NodeId id) throws RepositoryException, IOException {
    final List<Integer> docIds = new ArrayList<Integer>(1);
    final List<Document> docs = new ArrayList<Document>();
    final IndexReader reader = getIndexReader();
    try {/*from w  w w.j av a2  s.c o m*/
        IndexSearcher searcher = new IndexSearcher(reader);
        try {
            Query q = new TermQuery(new Term(FieldNames.UUID, id.toString()));
            searcher.search(q, new AbstractHitCollector() {
                @Override
                protected void collect(final int doc, final float score) {
                    docIds.add(doc);
                }
            });
            for (Integer docId : docIds) {
                docs.add(reader.document(docId, FieldSelectors.UUID_AND_PARENT));
            }
        } finally {
            searcher.close();
        }
    } finally {
        Util.closeOrRelease(reader);
    }
    return docs;
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndex.java

License:Apache License

@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
    final Filter filter = plan.getFilter();
    FullTextExpression ft = filter.getFullTextConstraint();
    final Set<String> relPaths = getRelativePaths(ft);
    if (relPaths.size() > 1) {
        return new MultiLuceneIndex(filter, rootState, relPaths).query();
    }/* w w  w  .  ja  va2s .  c o  m*/

    final String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next();
    // we only restrict non-full-text conditions if there is
    // no relative property in the full-text constraint
    final boolean nonFullTextConstraints = parent.isEmpty();
    final int parentDepth = getDepth(parent);
    QueryEngineSettings settings = filter.getQueryEngineSettings();
    Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {
        private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();
        private final Set<String> seenPaths = Sets.newHashSet();
        private ScoreDoc lastDoc;
        private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;
        private boolean noDocs = false;
        private long lastSearchIndexerVersion;

        @Override
        protected LuceneResultRow computeNext() {
            while (!queue.isEmpty() || loadDocs()) {
                return queue.remove();
            }
            return endOfData();
        }

        private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt)
                throws IOException {
            IndexReader reader = searcher.getIndexReader();
            PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
            reader.document(doc.doc, visitor);
            String path = visitor.getPath();
            if (path != null) {
                if ("".equals(path)) {
                    path = "/";
                }
                if (!parent.isEmpty()) {
                    // TODO OAK-828 this breaks node aggregation
                    // get the base path
                    // ensure the path ends with the given
                    // relative path
                    // if (!path.endsWith("/" + parent)) {
                    // continue;
                    // }
                    path = getAncestorPath(path, parentDepth);
                    // avoid duplicate entries
                    if (seenPaths.contains(path)) {
                        return null;
                    }
                    seenPaths.add(path);
                }

                return new LuceneResultRow(path, doc.score, excerpt);
            }
            return null;
        }

        /**
         * Loads the lucene documents in batches
         * @return true if any document is loaded
         */
        private boolean loadDocs() {

            if (noDocs) {
                return false;
            }

            ScoreDoc lastDocToRecord = null;

            IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH));
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(),
                        nonFullTextConstraints, indexNode.getDefinition());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TopDocs docs;
                    long time = System.currentTimeMillis();
                    checkForIndexVersionChange(searcher);
                    while (true) {
                        if (lastDoc != null) {
                            LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
                            docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
                        } else {
                            LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
                            docs = searcher.search(query, nextBatchSize);
                        }
                        time = System.currentTimeMillis() - time;
                        LOG.debug("... took {} ms", time);
                        nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);

                        PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT);
                        boolean addExcerpt = restriction != null && restriction.isNotNullRestriction();

                        Analyzer analyzer = indexNode.getDefinition().getAnalyzer();

                        if (addExcerpt) {
                            // setup highlighter
                            QueryScorer scorer = new QueryScorer(query);
                            scorer.setExpandMultiTermQuery(true);
                            highlighter.setFragmentScorer(scorer);
                        }

                        for (ScoreDoc doc : docs.scoreDocs) {
                            String excerpt = null;
                            if (addExcerpt) {
                                excerpt = getExcerpt(analyzer, searcher, doc);
                            }

                            LuceneResultRow row = convertToRow(doc, searcher, excerpt);
                            if (row != null) {
                                queue.add(row);
                            }
                            lastDocToRecord = doc;
                        }

                        if (queue.isEmpty() && docs.scoreDocs.length > 0) {
                            lastDoc = lastDocToRecord;
                        } else {
                            break;
                        }
                    }
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
                    SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade
                            .getLuceneRequest();
                    noDocs = true;
                    SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);

                    // ACL filter spellchecks
                    Collection<String> suggestedWords = new ArrayList<String>(suggestWords.length);
                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST,
                            indexNode.getDefinition().getAnalyzer());
                    for (SuggestWord suggestion : suggestWords) {
                        Query query = qp.createPhraseQuery(FieldNames.SUGGEST, suggestion.string);
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) {
                                    suggestedWords.add(suggestion.string);
                                    break;
                                }
                            }
                        }
                    }

                    queue.add(new LuceneResultRow(suggestedWords));
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
                    SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade
                            .getLuceneRequest();
                    noDocs = true;
                    List<Lookup.LookupResult> lookupResults = SuggestHelper
                            .getSuggestions(indexNode.getLookup(), suggestQuery);

                    // ACL filter suggestions
                    Collection<String> suggestedWords = new ArrayList<String>(lookupResults.size());
                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.FULLTEXT,
                            indexNode.getDefinition().getAnalyzer());
                    for (Lookup.LookupResult suggestion : lookupResults) {
                        Query query = qp.createPhraseQuery(FieldNames.FULLTEXT, suggestion.key.toString());
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) {
                                    suggestedWords.add(
                                            "{term=" + suggestion.key + ",weight=" + suggestion.value + "}");
                                    break;
                                }
                            }
                        }
                    }

                    queue.add(new LuceneResultRow(suggestedWords));
                }
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LuceneIndex.this, e);
            } finally {
                indexNode.release();
            }

            if (lastDocToRecord != null) {
                this.lastDoc = lastDocToRecord;
            }

            return !queue.isEmpty();
        }

        private void checkForIndexVersionChange(IndexSearcher searcher) {
            long currentVersion = LucenePropertyIndex.getVersion(searcher);
            if (currentVersion != lastSearchIndexerVersion && lastDoc != null) {
                lastDoc = null;
                LOG.debug("Change in index version detected {} => {}. Query would be performed without "
                        + "offset", currentVersion, lastSearchIndexerVersion);
            }
            this.lastSearchIndexerVersion = currentVersion;
        }
    };
    SizeEstimator sizeEstimator = new SizeEstimator() {
        @Override
        public long getSize() {
            IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH));
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(),
                        nonFullTextConstraints, indexNode.getDefinition());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TotalHitCountCollector collector = new TotalHitCountCollector();
                    searcher.search(query, collector);
                    int totalHits = collector.getTotalHits();
                    LOG.debug("Estimated size for query {} is {}", query, totalHits);
                    return totalHits;
                }
                LOG.debug("Estimated size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LuceneIndex.this, e);
            } finally {
                indexNode.release();
            }
            return -1;
        }
    };
    return new LucenePathCursor(itr, settings, sizeEstimator);
}