List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:org.elasticsearch.search.suggest.phrase.WordScorer.java
License:Apache License
public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator) throws IOException { this.field = field; if (terms == null) { throw new ElasticsearchIllegalArgumentException("Field: [" + field + "] does not exist"); }/*from w ww . ja v a 2 s.co m*/ this.terms = terms; final long vocSize = terms.getSumTotalTermFreq(); this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize; this.useTotalTermFreq = vocSize != -1; this.numTerms = terms.size(); this.termsEnum = terms.iterator(null); this.reader = reader; this.realWordLikelyhood = realWordLikelyHood; this.separator = separator; }
From source file:org.elasticsearch.test.unit.common.compress.CompressIndexInputOutputTests.java
License:Apache License
private void verify(IndexWriter writer) throws Exception { CheckIndex checkIndex = new CheckIndex(writer.getDirectory()); CheckIndex.Status status = checkIndex.checkIndex(); assertThat(status.clean, equalTo(true)); IndexReader reader = DirectoryReader.open(writer, true); final Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs != null && !liveDocs.get(i)) { continue; }/*from www. jav a2 s. c o m*/ Document document = reader.document(i); checkDoc(document); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count"); reader.document(i, visitor); document = visitor.getDocument(); checkDoc(document); } for (int i = 0; i < 100; i++) { int doc = ThreadLocalRandom.current().nextInt(reader.maxDoc()); if (liveDocs != null && !liveDocs.get(i)) { continue; } Document document = reader.document(doc); checkDoc(document); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count"); reader.document(doc, visitor); document = visitor.getDocument(); checkDoc(document); } }
From source file:org.elasticsearch.util.lucene.docidset.DocIdSets.java
License:Apache License
/** * Returns a cacheable version of the doc id set (might be the same instance provided as a parameter). *//*from w w w . ja va2 s.c om*/ public static DocIdSet cacheable(IndexReader reader, DocIdSet docIdSet) throws IOException { if (docIdSet.isCacheable()) { return docIdSet; } else { final DocIdSetIterator it = docIdSet.iterator(); // null is allowed to be returned by iterator(), // in this case we wrap with the empty set, // which is cacheable. return (it == null) ? DocIdSet.EMPTY_DOCIDSET : new OpenBitSetDISI(it, reader.maxDoc()); } }
From source file:org.elasticsearch.util.lucene.search.TermFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { OpenBitSet result = new OpenBitSet(reader.maxDoc()); TermDocs td = reader.termDocs();// w ww . ja v a 2 s. c o m try { td.seek(term); while (td.next()) { result.set(td.doc()); } } finally { td.close(); } return result; }
From source file:org.exoplatform.services.jcr.impl.core.query.lucene.CachingIndexReader.java
License:Apache License
/** * Creates a new <code>CachingIndexReader</code> based on * <code>delegatee</code>//from w w w . j a v a2s . co m * * @param delegatee the base <code>IndexReader</code>. * @param cache a document number cache, or <code>null</code> if not * available to this reader. * @param initCache if the {@link #parents} cache should be initialized * when this index reader is constructed. * @throws IOException if an error occurs while reading from the index. */ CachingIndexReader(IndexReader delegatee, DocNumberCache cache, boolean initCache) throws IOException { super(delegatee); this.cache = cache; this.parents = new DocId[delegatee.maxDoc()]; this.shareableNodes = new BitSet(); TermDocs tDocs = delegatee.termDocs(new Term(FieldNames.SHAREABLE_NODE, "")); try { while (tDocs.next()) { shareableNodes.set(tDocs.doc()); } } finally { tDocs.close(); } this.cacheInitializer = new CacheInitializer(delegatee); if (initCache) { cacheInitializer.run(); } // limit cache to 1% of maxDoc(), but at least 10. this.docNumber2uuid = Collections.synchronizedMap(new LRUMap(Math.max(10, delegatee.maxDoc() / 100))); this.termDocsCache = new TermDocsCache(delegatee, FieldNames.PROPERTIES); }
From source file:org.exoplatform.services.jcr.impl.core.query.lucene.MatchAllScorer.java
License:Apache License
/** * Creates a new MatchAllScorer./*from w ww . j a va 2 s . c o m*/ * * @param reader the IndexReader * @param field the field name to match. * @throws IOException if an error occurs while collecting hits. * e.g. while reading from the search index. */ MatchAllScorer(IndexReader reader, String field) throws IOException { super(Similarity.getDefault()); this.reader = reader; this.field = field; matchExpl = new Explanation(Similarity.getDefault().idf(reader.maxDoc(), reader.maxDoc()), "matchAll"); calculateDocFilter(); }
From source file:org.exoplatform.services.jcr.impl.core.query.lucene.MatchAllWeight.java
License:Apache License
/** * {@inheritDoc}/*from www .j a v a 2 s . com*/ */ @Override public Explanation explain(IndexReader reader, int doc) throws IOException { return new Explanation(Similarity.getDefault().idf(reader.maxDoc(), reader.maxDoc()), "matchAll"); }
From source file:org.exoplatform.services.jcr.impl.core.query.lucene.SearchIndex.java
License:Apache License
/** * {@inheritDoc}/*from w w w . j a va 2s .c o m*/ */ public void checkIndex(ItemDataConsumer itemStateManager, boolean isSystem, final InspectionReport report) throws RepositoryException, IOException { // The visitor, that performs item enumeration and checks if all nodes present in // persistent layer are indexed. Also collects the list of all indexed nodes // to optimize the process of backward check, when index is traversed to find // references to already deleted nodes class ItemDataIndexConsistencyVisitor extends ItemDataTraversingVisitor { private final IndexReader indexReader; private final Set<String> indexedNodes = new HashSet<String>(); /** * @param dataManager */ public ItemDataIndexConsistencyVisitor(ItemDataConsumer dataManager, IndexReader indexReader) { super(dataManager); this.indexReader = indexReader; } /** * {@inheritDoc} */ @Override protected void entering(PropertyData property, int level) throws RepositoryException { // ignore properties; } /** * {@inheritDoc} */ @Override protected void entering(NodeData node, int level) throws RepositoryException { // process node uuids one-by-one try { String uuid = node.getIdentifier(); TermDocs docs = indexReader.termDocs(new Term(FieldNames.UUID, uuid)); if (docs.next()) { indexedNodes.add(uuid); docs.doc(); if (docs.next()) { //multiple entries report.logComment("Multiple entires."); report.logBrokenObjectAndSetInconsistency("ID=" + uuid); } } else { report.logComment("Not indexed."); report.logBrokenObjectAndSetInconsistency("ID=" + uuid); } } catch (IOException e) { throw new RepositoryException(e.getMessage(), e); } } @Override protected void leaving(PropertyData property, int level) throws RepositoryException { // ignore properties } @Override protected void leaving(NodeData node, int level) throws RepositoryException { // do nothing } @Override protected void visitChildProperties(NodeData node) throws RepositoryException { //do nothing } public Set<String> getIndexedNodes() { return indexedNodes; } } // check relation Persistent Layer -> Index // If current workspace is system, then need to invoke reader correspondent to system index ensureFlushed(); if (isSystem) { if (getContext().getParentHandler() != null) { ((SearchIndex) getContext().getParentHandler()).ensureFlushed(); } } IndexReader indexReader = getIndexReader(isSystem); try { ItemData root = itemStateManager.getItemData(Constants.ROOT_UUID); ItemDataIndexConsistencyVisitor visitor = new ItemDataIndexConsistencyVisitor(itemStateManager, indexReader); root.accept(visitor); Set<String> documentUUIDs = visitor.getIndexedNodes(); // check relation Index -> Persistent Layer // find document that do not corresponds to real node // iterate on documents one-by-one for (int i = 0; i < indexReader.maxDoc(); i++) { if (indexReader.isDeleted(i)) { continue; } final int currentIndex = i; Document d = indexReader.document(currentIndex, FieldSelectors.UUID); String uuid = d.get(FieldNames.UUID); if (!documentUUIDs.contains(uuid)) { report.logComment("Document corresponds to removed node."); report.logBrokenObjectAndSetInconsistency("ID=" + uuid); } } } finally { Util.closeOrRelease(indexReader); } }
From source file:org.exoplatform.services.jcr.impl.core.query.lucene.SharedFieldCache.java
License:Apache License
/** * Creates a <code>ValueIndex</code> for a <code>field</code> and a term * <code>prefix</code>. The term prefix acts as the property name for the * shared <code>field</code>. * <p/>/*from w ww. j a v a 2s. c o m*/ * This method is an adapted version of: <code>FieldCacheImpl.getStringIndex()</code> * * @param reader the <code>IndexReader</code>. * @param field name of the shared field. * @param prefix the property name, will be used as term prefix. * @return a ValueIndex that contains the field values and order * information. * @throws IOException if an error occurs while reading from the index. */ public ValueIndex getValueIndex(IndexReader reader, String field, String prefix) throws IOException { if (reader instanceof ReadOnlyIndexReader) { reader = ((ReadOnlyIndexReader) reader).getBase(); } field = field.intern(); ValueIndex ret = lookup(reader, field, prefix); if (ret == null) { Comparable<?>[] retArray = new Comparable[reader.maxDoc()]; int setValues = 0; if (retArray.length > 0) { IndexFormatVersion version = IndexFormatVersion.getVersion(reader); boolean hasPayloads = version.isAtLeast(IndexFormatVersion.V3); TermDocs termDocs; byte[] payload = null; int type; if (hasPayloads) { termDocs = reader.termPositions(); payload = new byte[1]; } else { termDocs = reader.termDocs(); } TermEnum termEnum = reader.terms(new Term(field, prefix)); char[] tmp = new char[16]; try { if (termEnum.term() == null) { throw new RuntimeException("no terms in field " + field); } do { Term term = termEnum.term(); if (term.field() != field || !term.text().startsWith(prefix)) { break; } // make sure term is compacted String text = term.text(); int len = text.length() - prefix.length(); if (tmp.length < len) { // grow tmp tmp = new char[len]; } text.getChars(prefix.length(), text.length(), tmp, 0); String value = new String(tmp, 0, len); termDocs.seek(termEnum); while (termDocs.next()) { type = PropertyType.UNDEFINED; if (hasPayloads) { TermPositions termPos = (TermPositions) termDocs; termPos.nextPosition(); if (termPos.isPayloadAvailable()) { payload = termPos.getPayload(payload, 0); type = PropertyMetaData.fromByteArray(payload).getPropertyType(); } } setValues++; retArray[termDocs.doc()] = getValue(value, type); } } while (termEnum.next()); } finally { termDocs.close(); termEnum.close(); } } ValueIndex value = new ValueIndex(retArray, setValues); store(reader, field, prefix, value); return value; } return ret; }
From source file:org.fao.geonet.kernel.search.DuplicateDocFilter.java
License:Open Source License
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { final BitSet bits = new BitSet(reader.maxDoc()); new IndexSearcher(reader).search(_query, new Collector() { private int docBase; private IndexReader reader; @Override//from ww w . ja v a2s.c o m public void setScorer(Scorer scorer) throws IOException { } @Override public void collect(int doc) throws IOException { if (hits.size() <= _maxResults) { Document document; try { document = reader.document(docBase + doc, _fieldSelector); String id = document.get("_id"); if (!hits.contains(id)) { bits.set(docBase + doc); hits.add(id); } } catch (Exception e) { throw new RuntimeException(e); } } } @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { this.docBase = docBase; this.reader = reader; } @Override public boolean acceptsDocsOutOfOrder() { return false; } }); return new DocIdBitSet(bits); }