Example usage for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.elasticsearch.search.suggest.phrase.WordScorer.java

License:Apache License

public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator)
        throws IOException {
    this.field = field;
    if (terms == null) {
        throw new ElasticsearchIllegalArgumentException("Field: [" + field + "] does not exist");
    }/*from w ww .  ja  v  a  2 s.co  m*/
    this.terms = terms;
    final long vocSize = terms.getSumTotalTermFreq();
    this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize;
    this.useTotalTermFreq = vocSize != -1;
    this.numTerms = terms.size();
    this.termsEnum = terms.iterator(null);
    this.reader = reader;
    this.realWordLikelyhood = realWordLikelyHood;
    this.separator = separator;
}

From source file:org.elasticsearch.test.unit.common.compress.CompressIndexInputOutputTests.java

License:Apache License

private void verify(IndexWriter writer) throws Exception {
    CheckIndex checkIndex = new CheckIndex(writer.getDirectory());
    CheckIndex.Status status = checkIndex.checkIndex();
    assertThat(status.clean, equalTo(true));
    IndexReader reader = DirectoryReader.open(writer, true);
    final Bits liveDocs = MultiFields.getLiveDocs(reader);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (liveDocs != null && !liveDocs.get(i)) {
            continue;
        }/*from  www.  jav  a2 s.  c  o m*/
        Document document = reader.document(i);
        checkDoc(document);
        DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count");
        reader.document(i, visitor);
        document = visitor.getDocument();
        checkDoc(document);
    }
    for (int i = 0; i < 100; i++) {
        int doc = ThreadLocalRandom.current().nextInt(reader.maxDoc());
        if (liveDocs != null && !liveDocs.get(i)) {
            continue;
        }
        Document document = reader.document(doc);
        checkDoc(document);
        DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count");
        reader.document(doc, visitor);
        document = visitor.getDocument();
        checkDoc(document);
    }
}

From source file:org.elasticsearch.util.lucene.docidset.DocIdSets.java

License:Apache License

/**
 * Returns a cacheable version of the doc id set (might be the same instance provided as a parameter).
 *//*from w  w w  . ja va2 s.c om*/
public static DocIdSet cacheable(IndexReader reader, DocIdSet docIdSet) throws IOException {
    if (docIdSet.isCacheable()) {
        return docIdSet;
    } else {
        final DocIdSetIterator it = docIdSet.iterator();
        // null is allowed to be returned by iterator(),
        // in this case we wrap with the empty set,
        // which is cacheable.
        return (it == null) ? DocIdSet.EMPTY_DOCIDSET : new OpenBitSetDISI(it, reader.maxDoc());
    }
}

From source file:org.elasticsearch.util.lucene.search.TermFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    OpenBitSet result = new OpenBitSet(reader.maxDoc());
    TermDocs td = reader.termDocs();// w  ww .  ja  v a 2  s. c o  m
    try {
        td.seek(term);
        while (td.next()) {
            result.set(td.doc());
        }
    } finally {
        td.close();
    }
    return result;
}

From source file:org.exoplatform.services.jcr.impl.core.query.lucene.CachingIndexReader.java

License:Apache License

/**
 * Creates a new <code>CachingIndexReader</code> based on
 * <code>delegatee</code>//from   w  w  w  . j  a v a2s  .  co  m
 *
 * @param delegatee the base <code>IndexReader</code>.
 * @param cache     a document number cache, or <code>null</code> if not
 *                  available to this reader.
 * @param initCache if the {@link #parents} cache should be initialized
 *                  when this index reader is constructed.
 * @throws IOException if an error occurs while reading from the index.
 */
CachingIndexReader(IndexReader delegatee, DocNumberCache cache, boolean initCache) throws IOException {
    super(delegatee);
    this.cache = cache;
    this.parents = new DocId[delegatee.maxDoc()];
    this.shareableNodes = new BitSet();
    TermDocs tDocs = delegatee.termDocs(new Term(FieldNames.SHAREABLE_NODE, ""));
    try {
        while (tDocs.next()) {
            shareableNodes.set(tDocs.doc());
        }
    } finally {
        tDocs.close();
    }
    this.cacheInitializer = new CacheInitializer(delegatee);
    if (initCache) {
        cacheInitializer.run();
    }
    // limit cache to 1% of maxDoc(), but at least 10.
    this.docNumber2uuid = Collections.synchronizedMap(new LRUMap(Math.max(10, delegatee.maxDoc() / 100)));
    this.termDocsCache = new TermDocsCache(delegatee, FieldNames.PROPERTIES);
}

From source file:org.exoplatform.services.jcr.impl.core.query.lucene.MatchAllScorer.java

License:Apache License

/**
 * Creates a new MatchAllScorer./*from w  ww  .  j  a  va  2 s  . c o  m*/
 *
 * @param reader the IndexReader
 * @param field  the field name to match.
 * @throws IOException if an error occurs while collecting hits.
 *                     e.g. while reading from the search index.
 */
MatchAllScorer(IndexReader reader, String field) throws IOException {
    super(Similarity.getDefault());
    this.reader = reader;
    this.field = field;
    matchExpl = new Explanation(Similarity.getDefault().idf(reader.maxDoc(), reader.maxDoc()), "matchAll");
    calculateDocFilter();
}

From source file:org.exoplatform.services.jcr.impl.core.query.lucene.MatchAllWeight.java

License:Apache License

/**
 * {@inheritDoc}/*from   www .j  a  v  a 2 s  . com*/
 */
@Override
public Explanation explain(IndexReader reader, int doc) throws IOException {
    return new Explanation(Similarity.getDefault().idf(reader.maxDoc(), reader.maxDoc()), "matchAll");
}

From source file:org.exoplatform.services.jcr.impl.core.query.lucene.SearchIndex.java

License:Apache License

/**
 * {@inheritDoc}/*from   w  w w .  j  a  va  2s .c  o  m*/
 */
public void checkIndex(ItemDataConsumer itemStateManager, boolean isSystem, final InspectionReport report)
        throws RepositoryException, IOException {

    // The visitor, that performs item enumeration and checks if all nodes present in 
    // persistent layer are indexed. Also collects the list of all indexed nodes
    // to optimize the process of backward check, when index is traversed to find
    // references to already deleted nodes
    class ItemDataIndexConsistencyVisitor extends ItemDataTraversingVisitor {
        private final IndexReader indexReader;

        private final Set<String> indexedNodes = new HashSet<String>();

        /**
         * @param dataManager
         */
        public ItemDataIndexConsistencyVisitor(ItemDataConsumer dataManager, IndexReader indexReader) {
            super(dataManager);
            this.indexReader = indexReader;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        protected void entering(PropertyData property, int level) throws RepositoryException {
            // ignore properties;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        protected void entering(NodeData node, int level) throws RepositoryException {
            // process node uuids one-by-one
            try {
                String uuid = node.getIdentifier();
                TermDocs docs = indexReader.termDocs(new Term(FieldNames.UUID, uuid));

                if (docs.next()) {
                    indexedNodes.add(uuid);
                    docs.doc();
                    if (docs.next()) {
                        //multiple entries
                        report.logComment("Multiple entires.");
                        report.logBrokenObjectAndSetInconsistency("ID=" + uuid);
                    }
                } else {
                    report.logComment("Not indexed.");
                    report.logBrokenObjectAndSetInconsistency("ID=" + uuid);
                }
            } catch (IOException e) {
                throw new RepositoryException(e.getMessage(), e);
            }
        }

        @Override
        protected void leaving(PropertyData property, int level) throws RepositoryException {
            // ignore properties
        }

        @Override
        protected void leaving(NodeData node, int level) throws RepositoryException {
            // do nothing
        }

        @Override
        protected void visitChildProperties(NodeData node) throws RepositoryException {
            //do nothing
        }

        public Set<String> getIndexedNodes() {
            return indexedNodes;
        }
    }

    // check relation Persistent Layer -> Index
    // If current workspace is system, then need to invoke reader correspondent to system index
    ensureFlushed();
    if (isSystem) {
        if (getContext().getParentHandler() != null) {
            ((SearchIndex) getContext().getParentHandler()).ensureFlushed();
        }
    }
    IndexReader indexReader = getIndexReader(isSystem);
    try {
        ItemData root = itemStateManager.getItemData(Constants.ROOT_UUID);
        ItemDataIndexConsistencyVisitor visitor = new ItemDataIndexConsistencyVisitor(itemStateManager,
                indexReader);
        root.accept(visitor);

        Set<String> documentUUIDs = visitor.getIndexedNodes();

        // check relation Index -> Persistent Layer
        // find document that do not corresponds to real node
        // iterate on documents one-by-one
        for (int i = 0; i < indexReader.maxDoc(); i++) {
            if (indexReader.isDeleted(i)) {
                continue;
            }
            final int currentIndex = i;
            Document d = indexReader.document(currentIndex, FieldSelectors.UUID);
            String uuid = d.get(FieldNames.UUID);
            if (!documentUUIDs.contains(uuid)) {
                report.logComment("Document corresponds to removed node.");
                report.logBrokenObjectAndSetInconsistency("ID=" + uuid);
            }
        }
    } finally {
        Util.closeOrRelease(indexReader);
    }
}

From source file:org.exoplatform.services.jcr.impl.core.query.lucene.SharedFieldCache.java

License:Apache License

/**
 * Creates a <code>ValueIndex</code> for a <code>field</code> and a term
 * <code>prefix</code>. The term prefix acts as the property name for the
 * shared <code>field</code>.
 * <p/>/*from  w  ww.  j a v  a  2s. c o m*/
 * This method is an adapted version of: <code>FieldCacheImpl.getStringIndex()</code>
 *
 * @param reader     the <code>IndexReader</code>.
 * @param field      name of the shared field.
 * @param prefix     the property name, will be used as term prefix.
 * @return a ValueIndex that contains the field values and order
 *         information.
 * @throws IOException if an error occurs while reading from the index.
 */
public ValueIndex getValueIndex(IndexReader reader, String field, String prefix) throws IOException {

    if (reader instanceof ReadOnlyIndexReader) {
        reader = ((ReadOnlyIndexReader) reader).getBase();
    }

    field = field.intern();
    ValueIndex ret = lookup(reader, field, prefix);
    if (ret == null) {
        Comparable<?>[] retArray = new Comparable[reader.maxDoc()];
        int setValues = 0;
        if (retArray.length > 0) {
            IndexFormatVersion version = IndexFormatVersion.getVersion(reader);
            boolean hasPayloads = version.isAtLeast(IndexFormatVersion.V3);
            TermDocs termDocs;
            byte[] payload = null;
            int type;
            if (hasPayloads) {
                termDocs = reader.termPositions();
                payload = new byte[1];
            } else {
                termDocs = reader.termDocs();
            }
            TermEnum termEnum = reader.terms(new Term(field, prefix));

            char[] tmp = new char[16];
            try {
                if (termEnum.term() == null) {
                    throw new RuntimeException("no terms in field " + field);
                }
                do {
                    Term term = termEnum.term();
                    if (term.field() != field || !term.text().startsWith(prefix)) {
                        break;
                    }

                    // make sure term is compacted
                    String text = term.text();
                    int len = text.length() - prefix.length();
                    if (tmp.length < len) {
                        // grow tmp
                        tmp = new char[len];
                    }
                    text.getChars(prefix.length(), text.length(), tmp, 0);
                    String value = new String(tmp, 0, len);

                    termDocs.seek(termEnum);
                    while (termDocs.next()) {
                        type = PropertyType.UNDEFINED;
                        if (hasPayloads) {
                            TermPositions termPos = (TermPositions) termDocs;
                            termPos.nextPosition();
                            if (termPos.isPayloadAvailable()) {
                                payload = termPos.getPayload(payload, 0);
                                type = PropertyMetaData.fromByteArray(payload).getPropertyType();
                            }
                        }
                        setValues++;
                        retArray[termDocs.doc()] = getValue(value, type);
                    }
                } while (termEnum.next());
            } finally {
                termDocs.close();
                termEnum.close();
            }
        }
        ValueIndex value = new ValueIndex(retArray, setValues);
        store(reader, field, prefix, value);
        return value;
    }
    return ret;
}

From source file:org.fao.geonet.kernel.search.DuplicateDocFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    final BitSet bits = new BitSet(reader.maxDoc());

    new IndexSearcher(reader).search(_query, new Collector() {

        private int docBase;
        private IndexReader reader;

        @Override//from ww  w .  ja  v a2s.c  o m
        public void setScorer(Scorer scorer) throws IOException {
        }

        @Override
        public void collect(int doc) throws IOException {
            if (hits.size() <= _maxResults) {
                Document document;
                try {
                    document = reader.document(docBase + doc, _fieldSelector);
                    String id = document.get("_id");

                    if (!hits.contains(id)) {
                        bits.set(docBase + doc);
                        hits.add(id);
                    }
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        }

        @Override
        public void setNextReader(IndexReader reader, int docBase) throws IOException {
            this.docBase = docBase;
            this.reader = reader;
        }

        @Override
        public boolean acceptsDocsOutOfOrder() {
            return false;
        }
    });

    return new DocIdBitSet(bits);
}