Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.fao.geonet.kernel.search.SearchManager.java

License:Open Source License

/**
 * TODO javadoc./*from  w w  w. java  2 s .c  om*/
 *
 * @return
 * @throws Exception
 */
public Set<Integer> getDocsWithXLinks() throws Exception {
    IndexReader reader = getIndexReader(null);
    try {
        FieldSelector idXLinkSelector = new FieldSelector() {
            public final FieldSelectorResult accept(String name) {
                if (name.equals("_id") || name.equals("_hasxlinks"))
                    return FieldSelectorResult.LOAD;
                else
                    return FieldSelectorResult.NO_LOAD;
            }
        };

        Set<Integer> docs = new LinkedHashSet<Integer>();
        for (int i = 0; i < reader.maxDoc(); i++) {
            if (reader.isDeleted(i))
                continue; // FIXME: strange lucene hack: sometimes it tries to load a deleted document
            Document doc = reader.document(i, idXLinkSelector);
            String id = doc.get("_id");
            String hasxlinks = doc.get("_hasxlinks");
            if (Log.isDebugEnabled(Geonet.INDEX_ENGINE))
                Log.debug(Geonet.INDEX_ENGINE, "Got id " + id + " : '" + hasxlinks + "'");
            if (id == null) {
                Log.error(Geonet.INDEX_ENGINE, "Document with no _id field skipped! Document is " + doc);
                continue;
            }
            if (hasxlinks.trim().equals("1")) {
                docs.add(new Integer(id));
            }
        }
        return docs;
    } finally {
        releaseIndexReader(reader);
    }
}

From source file:org.fao.geonet.kernel.search.SearchManager.java

License:Open Source License

/**
 * TODO javadoc.//from w  w w . j a  v  a2  s. c om
 *
 * @return
 * @throws Exception
 */
public Map<String, String> getDocsChangeDate() throws Exception {
    IndexReader reader = getIndexReader(null);
    try {
        FieldSelector idChangeDateSelector = new FieldSelector() {
            public final FieldSelectorResult accept(String name) {
                if (name.equals("_id") || name.equals("_changeDate"))
                    return FieldSelectorResult.LOAD;
                else
                    return FieldSelectorResult.NO_LOAD;
            }
        };

        int capacity = (int) (reader.maxDoc() / 0.75) + 1;
        Map<String, String> docs = new HashMap<String, String>(capacity);
        for (int i = 0; i < reader.maxDoc(); i++) {
            if (reader.isDeleted(i))
                continue; // FIXME: strange lucene hack: sometimes it tries to load a deleted document
            Document doc = reader.document(i, idChangeDateSelector);
            String id = doc.get("_id");
            if (id == null) {
                Log.error(Geonet.INDEX_ENGINE, "Document with no _id field skipped! Document is " + doc);
                continue;
            }
            docs.put(id, doc.get("_changeDate"));
        }
        return docs;
    } finally {
        releaseIndexReader(reader);
    }
}

From source file:org.fao.geonet.kernel.search.spatial.FullScanFilter.java

License:Open Source License

public BitSet bits(final IndexReader reader) throws IOException {
    final BitSet bits = new BitSet(reader.maxDoc());

    final Set<String> matches = loadMatches();

    new IndexSearcher(reader).search(_query, new Collector() {
        private int docBase;

        // ignore scorer
        public void setScorer(Scorer scorer) {
        }//from  w  ww  .ja  va2  s .  c o  m

        // accept docs out of order (for a BitSet it doesn't matter)
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        public final void collect(int doc) {
            Document document;
            try {
                document = reader.document(doc, _selector);
                if (matches.contains(document.get("_id"))) {
                    bits.set(docBase + doc);
                }
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        public void setNextReader(IndexReader reader, int docBase) {
            this.docBase = docBase;
        }
    });
    return bits;
}

From source file:org.fao.geonet.kernel.search.spatial.SpatialFilter.java

License:Open Source License

public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
    final OpenBitSet bits = new OpenBitSet(reader.maxDoc());

    final Map<String, FeatureId> unrefinedSpatialMatches = unrefinedSpatialMatches();
    final Set<FeatureId> matches = new HashSet<FeatureId>();
    final Multimap<FeatureId, Integer> docIndexLookup = HashMultimap.create();

    if (unrefinedSpatialMatches.isEmpty())
        return bits;

    new IndexSearcher(reader).search(_query, new Collector() {
        private int docBase;
        private Document document;

        // ignore scorer
        public void setScorer(Scorer scorer) {
        }//from w w w  .  jav a  2 s  . c  o m

        // accept docs out of order (for a BitSet it doesn't matter)
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        public void collect(int doc) {
            doc = doc + docBase;
            try {
                document = reader.document(doc, _selector);
                String key = document.get("_id");
                FeatureId featureId = unrefinedSpatialMatches.get(key);
                if (featureId != null) {
                    matches.add(featureId);
                    docIndexLookup.put(featureId, doc + docBase);
                }
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        public void setNextReader(IndexReader reader, int docBase) {
            this.docBase = docBase;
        }
    });

    if (matches.isEmpty()) {
        return bits;
    } else {
        return applySpatialFilter(matches, docIndexLookup, bits);
    }
}

From source file:org.hibernate.search.filter.ChainedFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    int size = chainedFilters.size();
    if (size == 0) {
        throw new AssertionFailure("Chainedfilter has no filters to chain for");
    } else if (size == 1) {
        return chainedFilters.get(0).getDocIdSet(reader);
    } else {/*from ww  w.  java2  s.  c  o  m*/
        List<DocIdSet> subSets = new ArrayList<DocIdSet>(size);
        for (Filter f : chainedFilters) {
            subSets.add(f.getDocIdSet(reader));
        }
        subSets = FilterOptimizationHelper.mergeByBitAnds(subSets);
        if (subSets.size() == 1) {
            return subSets.get(0);
        }
        return new AndDocIdSet(subSets, reader.maxDoc());
    }
}

From source file:org.hibernate.search.filter.impl.ChainedFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    int size = chainedFilters.size();
    if (size == 0) {
        throw new AssertionFailure("No filters to chain");
    } else if (size == 1) {
        return chainedFilters.get(0).getDocIdSet(reader);
    } else {/*from  w w  w .  j  ava  2s .c  om*/
        List<DocIdSet> subSets = new ArrayList<DocIdSet>(size);
        for (Filter f : chainedFilters) {
            subSets.add(f.getDocIdSet(reader));
        }
        subSets = FilterOptimizationHelper.mergeByBitAnds(subSets);
        if (subSets.size() == 1) {
            return subSets.get(0);
        }
        return new AndDocIdSet(subSets, reader.maxDoc());
    }
}

From source file:org.hibernate.search.spatial.impl.GridFilter.java

License:Open Source License

/**
 * Returns Doc Ids by searching the index for document having the correct Grid Cell Id at given grid level
 *
 * @param reader reader to the index//from   w w  w  . j ava2s . c o  m
 */
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    if (gridCellsIds.size() == 0) {
        return null;
    }

    OpenBitSet matchedDocumentsIds = new OpenBitSet(reader.maxDoc());
    Boolean found = false;
    for (int i = 0; i < gridCellsIds.size(); i++) {
        Term gridCellTerm = new Term(fieldName, gridCellsIds.get(i));
        TermDocs gridCellsDocs = reader.termDocs(gridCellTerm);
        if (gridCellsDocs != null) {
            while (gridCellsDocs.next()) {
                matchedDocumentsIds.fastSet(gridCellsDocs.doc());
                found = true;
            }
        }
    }

    if (found) {
        return matchedDocumentsIds;
    } else {
        return null;
    }
}

From source file:org.hibernate.search.spatial.impl.QuadTreeFilter.java

License:Open Source License

/**
 * Returns Doc Ids by searching the index for document having the correct Qud Tree Cell Id at given qud tree level
 *
 * @param reader reader to the index//from   w  ww.  j  a v a2 s  . c o m
 */
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    if (quadTreeCellsIds.size() == 0) {
        return null;
    }

    OpenBitSet matchedDocumentsIds = new OpenBitSet(reader.maxDoc());
    Boolean found = false;
    for (int i = 0; i < quadTreeCellsIds.size(); i++) {
        Term quadTreeCellTerm = new Term(fieldName, quadTreeCellsIds.get(i));
        TermDocs quadTreeCellsDocs = reader.termDocs(quadTreeCellTerm);
        if (quadTreeCellsDocs != null) {
            while (quadTreeCellsDocs.next()) {
                matchedDocumentsIds.fastSet(quadTreeCellsDocs.doc());
                found = true;
            }
        }
    }

    if (found) {
        return matchedDocumentsIds;
    } else {
        return null;
    }
}

From source file:org.hibernate.search.test.filter.BestDriversFilter.java

License:Open Source License

public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
    TermDocs termDocs = reader.termDocs(new Term("score", "5"));
    while (termDocs.next()) {
        bitSet.set(termDocs.doc());/*from w  ww.j a va  2s . c  o m*/
    }
    return bitSet;
}

From source file:org.hippoecm.repository.FacetedNavigationEngineImpl.java

License:Apache License

private DocIdSet filterDocIdSetJackRabbitQuery(final org.apache.lucene.search.Query query,
        final IndexReader indexReader) throws IOException {
    if ((query instanceof BooleanQuery) && ((BooleanQuery) query).clauses().size() == 0) {
        // no constraints. Return null
        return null;
    }//from  www  . j  a  va  2 s.  co  m

    // TODO CACHE jackrabbit queries still need to be cached. Difficult parts are
    // 1: Jackrabbit Queries do not have hashCode or equals
    // 2: Jackrabbit Query implementations keep REFERENCES (!!) to index readers
    // 3.Jackrabbit creates a *NEW* JackrabbitIndexReader instance for *EVERY* search. Hence
    // if the reader is a JackrabbitIndexReader, the cache would be pointless.
    // Since all index readers in JR extend from FilterIndexReader, we can use
    // reader.getCoreCacheKey() : The FilterIndexReader delegates that call to the
    // wrapped index reader
    // TODO CACHE
    final OpenBitSet bits = new OpenBitSet(indexReader.maxDoc());
    long start = System.currentTimeMillis();
    new IndexSearcher(indexReader).search(query, new AbstractHitCollector() {
        @Override
        public final void collect(int doc, float score) {
            bits.set(doc); // set bit for hit
        }
    });
    log.info("Creating doc id set for Jackrabbit Query took {} ms.",
            String.valueOf(System.currentTimeMillis() - start));
    return new DocIdSetFilter(bits).getDocIdSet(indexReader);

}