Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.elasticsearch.search.suggest.phrase.WordScorer.java

License:Apache License

public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator)
        throws IOException {
    this.field = field;
    if (terms == null) {
        throw new ElasticsearchIllegalArgumentException("Field: [" + field + "] does not exist");
    }/*from w ww .  ja  v  a  2 s.co  m*/
    this.terms = terms;
    final long vocSize = terms.getSumTotalTermFreq();
    this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize;
    this.useTotalTermFreq = vocSize != -1;
    this.numTerms = terms.size();
    this.termsEnum = terms.iterator(null);
    this.reader = reader;
    this.realWordLikelyhood = realWordLikelyHood;
    this.separator = separator;
}

From source file:org.elasticsearch.test.unit.common.compress.CompressIndexInputOutputTests.java

License:Apache License

private void verify(IndexWriter writer) throws Exception {
    CheckIndex checkIndex = new CheckIndex(writer.getDirectory());
    CheckIndex.Status status = checkIndex.checkIndex();
    assertThat(status.clean, equalTo(true));
    IndexReader reader = DirectoryReader.open(writer, true);
    final Bits liveDocs = MultiFields.getLiveDocs(reader);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (liveDocs != null && !liveDocs.get(i)) {
            continue;
        }/*from  www.  jav  a2 s.  c  o m*/
        Document document = reader.document(i);
        checkDoc(document);
        DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count");
        reader.document(i, visitor);
        document = visitor.getDocument();
        checkDoc(document);
    }
    for (int i = 0; i < 100; i++) {
        int doc = ThreadLocalRandom.current().nextInt(reader.maxDoc());
        if (liveDocs != null && !liveDocs.get(i)) {
            continue;
        }
        Document document = reader.document(doc);
        checkDoc(document);
        DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count");
        reader.document(doc, visitor);
        document = visitor.getDocument();
        checkDoc(document);
    }
}

From source file:org.elasticsearch.util.lucene.docidset.DocIdSets.java

License:Apache License

/**
 * Returns a cacheable version of the doc id set (might be the same instance provided as a parameter).
 *//*from w  w w  . ja va2 s.c om*/
public static DocIdSet cacheable(IndexReader reader, DocIdSet docIdSet) throws IOException {
    if (docIdSet.isCacheable()) {
        return docIdSet;
    } else {
        final DocIdSetIterator it = docIdSet.iterator();
        // null is allowed to be returned by iterator(),
        // in this case we wrap with the empty set,
        // which is cacheable.
        return (it == null) ? DocIdSet.EMPTY_DOCIDSET : new OpenBitSetDISI(it, reader.maxDoc());
    }
}

From source file:org.elasticsearch.util.lucene.search.TermFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    OpenBitSet result = new OpenBitSet(reader.maxDoc());
    TermDocs td = reader.termDocs();// w  ww .  ja  v a 2  s. c o  m
    try {
        td.seek(term);
        while (td.next()) {
            result.set(td.doc());
        }
    } finally {
        td.close();
    }
    return result;
}

From source file:org.exoplatform.services.jcr.impl.core.query.lucene.CachingIndexReader.java

License:Apache License

/**
 * Creates a new <code>CachingIndexReader</code> based on
 * <code>delegatee</code>//from   w  w  w  . j  a v a2s  .  co  m
 *
 * @param delegatee the base <code>IndexReader</code>.
 * @param cache     a document number cache, or <code>null</code> if not
 *                  available to this reader.
 * @param initCache if the {@link #parents} cache should be initialized
 *                  when this index reader is constructed.
 * @throws IOException if an error occurs while reading from the index.
 */
CachingIndexReader(IndexReader delegatee, DocNumberCache cache, boolean initCache) throws IOException {
    super(delegatee);
    this.cache = cache;
    this.parents = new DocId[delegatee.maxDoc()];
    this.shareableNodes = new BitSet();
    TermDocs tDocs = delegatee.termDocs(new Term(FieldNames.SHAREABLE_NODE, ""));
    try {
        while (tDocs.next()) {
            shareableNodes.set(tDocs.doc());
        }
    } finally {
        tDocs.close();
    }
    this.cacheInitializer = new CacheInitializer(delegatee);
    if (initCache) {
        cacheInitializer.run();
    }
    // limit cache to 1% of maxDoc(), but at least 10.
    this.docNumber2uuid = Collections.synchronizedMap(new LRUMap(Math.max(10, delegatee.maxDoc() / 100)));
    this.termDocsCache = new TermDocsCache(delegatee, FieldNames.PROPERTIES);
}

From source file:org.exoplatform.services.jcr.impl.core.query.lucene.MatchAllScorer.java

License:Apache License

/**
 * Creates a new MatchAllScorer./*from w  ww  .  j  a  va  2 s  . c o  m*/
 *
 * @param reader the IndexReader
 * @param field  the field name to match.
 * @throws IOException if an error occurs while collecting hits.
 *                     e.g. while reading from the search index.
 */
MatchAllScorer(IndexReader reader, String field) throws IOException {
    super(Similarity.getDefault());
    this.reader = reader;
    this.field = field;
    matchExpl = new Explanation(Similarity.getDefault().idf(reader.maxDoc(), reader.maxDoc()), "matchAll");
    calculateDocFilter();
}

From source file:org.exoplatform.services.jcr.impl.core.query.lucene.MatchAllWeight.java

License:Apache License

/**
 * {@inheritDoc}/*from   www .j  a  v  a 2 s  . com*/
 */
@Override
public Explanation explain(IndexReader reader, int doc) throws IOException {
    return new Explanation(Similarity.getDefault().idf(reader.maxDoc(), reader.maxDoc()), "matchAll");
}

From source file:org.exoplatform.services.jcr.impl.core.query.lucene.SearchIndex.java

License:Apache License

/**
 * {@inheritDoc}/*from   w  w w .  j  a  va  2s .c  o  m*/
 */
public void checkIndex(ItemDataConsumer itemStateManager, boolean isSystem, final InspectionReport report)
        throws RepositoryException, IOException {

    // The visitor, that performs item enumeration and checks if all nodes present in 
    // persistent layer are indexed. Also collects the list of all indexed nodes
    // to optimize the process of backward check, when index is traversed to find
    // references to already deleted nodes
    class ItemDataIndexConsistencyVisitor extends ItemDataTraversingVisitor {
        private final IndexReader indexReader;

        private final Set<String> indexedNodes = new HashSet<String>();

        /**
         * @param dataManager
         */
        public ItemDataIndexConsistencyVisitor(ItemDataConsumer dataManager, IndexReader indexReader) {
            super(dataManager);
            this.indexReader = indexReader;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        protected void entering(PropertyData property, int level) throws RepositoryException {
            // ignore properties;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        protected void entering(NodeData node, int level) throws RepositoryException {
            // process node uuids one-by-one
            try {
                String uuid = node.getIdentifier();
                TermDocs docs = indexReader.termDocs(new Term(FieldNames.UUID, uuid));

                if (docs.next()) {
                    indexedNodes.add(uuid);
                    docs.doc();
                    if (docs.next()) {
                        //multiple entries
                        report.logComment("Multiple entires.");
                        report.logBrokenObjectAndSetInconsistency("ID=" + uuid);
                    }
                } else {
                    report.logComment("Not indexed.");
                    report.logBrokenObjectAndSetInconsistency("ID=" + uuid);
                }
            } catch (IOException e) {
                throw new RepositoryException(e.getMessage(), e);
            }
        }

        @Override
        protected void leaving(PropertyData property, int level) throws RepositoryException {
            // ignore properties
        }

        @Override
        protected void leaving(NodeData node, int level) throws RepositoryException {
            // do nothing
        }

        @Override
        protected void visitChildProperties(NodeData node) throws RepositoryException {
            //do nothing
        }

        public Set<String> getIndexedNodes() {
            return indexedNodes;
        }
    }

    // check relation Persistent Layer -> Index
    // If current workspace is system, then need to invoke reader correspondent to system index
    ensureFlushed();
    if (isSystem) {
        if (getContext().getParentHandler() != null) {
            ((SearchIndex) getContext().getParentHandler()).ensureFlushed();
        }
    }
    IndexReader indexReader = getIndexReader(isSystem);
    try {
        ItemData root = itemStateManager.getItemData(Constants.ROOT_UUID);
        ItemDataIndexConsistencyVisitor visitor = new ItemDataIndexConsistencyVisitor(itemStateManager,
                indexReader);
        root.accept(visitor);

        Set<String> documentUUIDs = visitor.getIndexedNodes();

        // check relation Index -> Persistent Layer
        // find document that do not corresponds to real node
        // iterate on documents one-by-one
        for (int i = 0; i < indexReader.maxDoc(); i++) {
            if (indexReader.isDeleted(i)) {
                continue;
            }
            final int currentIndex = i;
            Document d = indexReader.document(currentIndex, FieldSelectors.UUID);
            String uuid = d.get(FieldNames.UUID);
            if (!documentUUIDs.contains(uuid)) {
                report.logComment("Document corresponds to removed node.");
                report.logBrokenObjectAndSetInconsistency("ID=" + uuid);
            }
        }
    } finally {
        Util.closeOrRelease(indexReader);
    }
}

From source file:org.exoplatform.services.jcr.impl.core.query.lucene.SharedFieldCache.java

License:Apache License

/**
 * Creates a <code>ValueIndex</code> for a <code>field</code> and a term
 * <code>prefix</code>. The term prefix acts as the property name for the
 * shared <code>field</code>.
 * <p/>/*from  w  ww.  j a v  a  2s. c o m*/
 * This method is an adapted version of: <code>FieldCacheImpl.getStringIndex()</code>
 *
 * @param reader     the <code>IndexReader</code>.
 * @param field      name of the shared field.
 * @param prefix     the property name, will be used as term prefix.
 * @return a ValueIndex that contains the field values and order
 *         information.
 * @throws IOException if an error occurs while reading from the index.
 */
public ValueIndex getValueIndex(IndexReader reader, String field, String prefix) throws IOException {

    if (reader instanceof ReadOnlyIndexReader) {
        reader = ((ReadOnlyIndexReader) reader).getBase();
    }

    field = field.intern();
    ValueIndex ret = lookup(reader, field, prefix);
    if (ret == null) {
        Comparable<?>[] retArray = new Comparable[reader.maxDoc()];
        int setValues = 0;
        if (retArray.length > 0) {
            IndexFormatVersion version = IndexFormatVersion.getVersion(reader);
            boolean hasPayloads = version.isAtLeast(IndexFormatVersion.V3);
            TermDocs termDocs;
            byte[] payload = null;
            int type;
            if (hasPayloads) {
                termDocs = reader.termPositions();
                payload = new byte[1];
            } else {
                termDocs = reader.termDocs();
            }
            TermEnum termEnum = reader.terms(new Term(field, prefix));

            char[] tmp = new char[16];
            try {
                if (termEnum.term() == null) {
                    throw new RuntimeException("no terms in field " + field);
                }
                do {
                    Term term = termEnum.term();
                    if (term.field() != field || !term.text().startsWith(prefix)) {
                        break;
                    }

                    // make sure term is compacted
                    String text = term.text();
                    int len = text.length() - prefix.length();
                    if (tmp.length < len) {
                        // grow tmp
                        tmp = new char[len];
                    }
                    text.getChars(prefix.length(), text.length(), tmp, 0);
                    String value = new String(tmp, 0, len);

                    termDocs.seek(termEnum);
                    while (termDocs.next()) {
                        type = PropertyType.UNDEFINED;
                        if (hasPayloads) {
                            TermPositions termPos = (TermPositions) termDocs;
                            termPos.nextPosition();
                            if (termPos.isPayloadAvailable()) {
                                payload = termPos.getPayload(payload, 0);
                                type = PropertyMetaData.fromByteArray(payload).getPropertyType();
                            }
                        }
                        setValues++;
                        retArray[termDocs.doc()] = getValue(value, type);
                    }
                } while (termEnum.next());
            } finally {
                termDocs.close();
                termEnum.close();
            }
        }
        ValueIndex value = new ValueIndex(retArray, setValues);
        store(reader, field, prefix, value);
        return value;
    }
    return ret;
}

From source file:org.fao.geonet.kernel.search.DuplicateDocFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    final BitSet bits = new BitSet(reader.maxDoc());

    new IndexSearcher(reader).search(_query, new Collector() {

        private int docBase;
        private IndexReader reader;

        @Override//from ww  w .  ja  v a2s.c  o m
        public void setScorer(Scorer scorer) throws IOException {
        }

        @Override
        public void collect(int doc) throws IOException {
            if (hits.size() <= _maxResults) {
                Document document;
                try {
                    document = reader.document(docBase + doc, _fieldSelector);
                    String id = document.get("_id");

                    if (!hits.contains(id)) {
                        bits.set(docBase + doc);
                        hits.add(id);
                    }
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        }

        @Override
        public void setNextReader(IndexReader reader, int docBase) throws IOException {
            this.docBase = docBase;
            this.reader = reader;
        }

        @Override
        public boolean acceptsDocsOutOfOrder() {
            return false;
        }
    });

    return new DocIdBitSet(bits);
}