List of usage examples for org.apache.lucene.index LeafReader maxDoc
public abstract int maxDoc();
From source file:org.apache.solr.uninverting.DocTermOrds.java
License:Apache License
/** Returns a SortedSetDocValues view of this instance */ public SortedSetDocValues iterator(LeafReader reader) throws IOException { if (isEmpty()) { return DocValues.emptySortedSet(); } else {//from w ww . j a v a2s .c om return new LegacySortedSetDocValuesWrapper(new Iterator(reader), reader.maxDoc()); } }
From source file:org.apache.solr.uninverting.FieldCacheImpl.java
License:Apache License
void setDocsWithField(LeafReader reader, String field, Bits docsWithField, Parser parser) { final int maxDoc = reader.maxDoc(); final Bits bits; if (docsWithField == null) { bits = new Bits.MatchNoBits(maxDoc); } else if (docsWithField instanceof FixedBitSet) { final int numSet = ((FixedBitSet) docsWithField).cardinality(); if (numSet >= maxDoc) { // The cardinality of the BitSet is maxDoc if all documents have a value. assert numSet == maxDoc; bits = new Bits.MatchAllBits(maxDoc); } else {/* ww w.j a v a 2s . c o m*/ bits = docsWithField; } } else { bits = docsWithField; } caches.get(DocsWithFieldCache.class).put(reader, new CacheKey(field, parser), new BitsEntry(bits)); }
From source file:org.apache.solr.uninverting.FieldCacheImpl.java
License:Apache License
@Override public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException { final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); if (fieldInfo == null) { // field does not exist or has no value return new Bits.MatchNoBits(reader.maxDoc()); }//from ww w . j a va 2 s . c o m if (fieldInfo.getDocValuesType() != DocValuesType.NONE) { // doc values case } else if (parser instanceof PointParser) { // points case } else { // postings case if (fieldInfo.getIndexOptions() == IndexOptions.NONE) { return new Bits.MatchNoBits(reader.maxDoc()); } } BitsEntry bitsEntry = (BitsEntry) caches.get(DocsWithFieldCache.class).get(reader, new CacheKey(field, parser)); return bitsEntry.bits; }
From source file:org.apache.solr.uninverting.TestDocTermOrds.java
License:Apache License
private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef) throws Exception { final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "field", prefixRef, Integer.MAX_VALUE, TestUtil.nextInt(random(), 2, 10)); final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER); /*/*from w w w .ja v a 2 s .com*/ for(int docID=0;docID<subR.maxDoc();docID++) { System.out.println(" docID=" + docID + " id=" + docIDToID[docID]); } */ if (VERBOSE) { System.out.println("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.utf8ToString())); System.out.println("TEST: all TERMS:"); TermsEnum allTE = MultiFields.getTerms(r, "field").iterator(); int ord = 0; while (allTE.next() != null) { System.out.println(" ord=" + (ord++) + " term=" + allTE.term().utf8ToString()); } } //final TermsEnum te = subR.fields().terms("field").iterator(); final TermsEnum te = dto.getOrdTermsEnum(r); if (dto.numTerms() == 0) { if (prefixRef == null) { assertNull(MultiFields.getTerms(r, "field")); } else { Terms terms = MultiFields.getTerms(r, "field"); if (terms != null) { TermsEnum termsEnum = terms.iterator(); TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef); if (result != TermsEnum.SeekStatus.END) { assertFalse( "term=" + termsEnum.term().utf8ToString() + " matches prefix=" + prefixRef.utf8ToString(), StringHelper.startsWith(termsEnum.term(), prefixRef)); } else { // ok } } else { // ok } } return; } if (VERBOSE) { System.out.println("TEST: TERMS:"); te.seekExact(0); while (true) { System.out.println(" ord=" + te.ord() + " term=" + te.term().utf8ToString()); if (te.next() == null) { break; } } } SortedSetDocValues iter = dto.iterator(r); for (int docID = 0; docID < r.maxDoc(); docID++) { assertEquals(docID, docIDToID.nextDoc()); if (docID > iter.docID()) { iter.nextDoc(); } if (docID < iter.docID()) { int[] answers = idToOrds[(int) docIDToID.longValue()]; assertEquals(0, answers.length); continue; } if (VERBOSE) { System.out.println( "TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.longValue() + ")"); } final int[] answers = idToOrds[(int) docIDToID.longValue()]; int upto = 0; long ord; while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { te.seekExact(ord); final BytesRef expected = termsArray[answers[upto++]]; if (VERBOSE) { System.out.println(" exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString()); } assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + ord, expected, te.term()); } assertEquals(answers.length, upto); } }
From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java
License:Apache License
private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception { Directory dir = newDirectory();/*from www. ja v a 2 s. co m*/ IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field indexedField = new StringField("indexed", "", Field.Store.NO); Field dvField = new SortedDocValuesField("dv", new BytesRef()); doc.add(idField); doc.add(indexedField); doc.add(dvField); // index some docs int numDocs = atLeast(300); for (int i = 0; i < numDocs; i++) { idField.setStringValue(Integer.toString(i)); final int length; if (minLength == maxLength) { length = minLength; // fixed length } else { length = TestUtil.nextInt(random(), minLength, maxLength); } String value = TestUtil.randomSimpleString(random(), length); indexedField.setStringValue(value); dvField.setBytesValue(new BytesRef(value)); writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } writer.close(); // compare DirectoryReader ir = DirectoryReader.open(dir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed"); SortedDocValues actual = r.getSortedDocValues("dv"); assertEquals(r.maxDoc(), expected, actual); } ir.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java
License:Apache License
private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception { Directory dir = newDirectory();//ww w.j a va2s . c om IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); // index some docs int numDocs = atLeast(300); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); Field idField = new StringField("id", Integer.toString(i), Field.Store.NO); doc.add(idField); final int length = TestUtil.nextInt(random(), minLength, maxLength); int numValues = random().nextInt(17); // create a random list of strings List<String> values = new ArrayList<>(); for (int v = 0; v < numValues; v++) { values.add(TestUtil.randomSimpleString(random(), minLength, length)); } // add in any order to the indexed field ArrayList<String> unordered = new ArrayList<>(values); Collections.shuffle(unordered, random()); for (String v : values) { doc.add(newStringField("indexed", v, Field.Store.NO)); } // add in any order to the dv field ArrayList<String> unordered2 = new ArrayList<>(values); Collections.shuffle(unordered2, random()); for (String v : unordered2) { doc.add(new SortedSetDocValuesField("dv", new BytesRef(v))); } writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } // compare per-segment DirectoryReader ir = writer.getReader(); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null); SortedSetDocValues actual = r.getSortedSetDocValues("dv"); assertEquals(r.maxDoc(), expected, actual); } ir.close(); writer.forceMerge(1); // now compare again after the merge ir = writer.getReader(); LeafReader ar = getOnlyLeafReader(ir); SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null); SortedSetDocValues actual = ar.getSortedSetDocValues("dv"); assertEquals(ir.maxDoc(), expected, actual); ir.close(); writer.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestFieldCacheWithThreads.java
License:Apache License
public void test2() throws Exception { Random random = random();//from w ww .j av a2 s. co m final int NUM_DOCS = atLeast(100); final Directory dir = newDirectory(); final RandomIndexWriter writer = new RandomIndexWriter(random, dir); final boolean allowDups = random.nextBoolean(); final Set<String> seen = new HashSet<>(); if (VERBOSE) { System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups); } int numDocs = 0; final List<BytesRef> docValues = new ArrayList<>(); // TODO: deletions while (numDocs < NUM_DOCS) { final String s; if (random.nextBoolean()) { s = TestUtil.randomSimpleString(random); } else { s = TestUtil.randomUnicodeString(random); } final BytesRef br = new BytesRef(s); if (!allowDups) { if (seen.contains(s)) { continue; } seen.add(s); } if (VERBOSE) { System.out.println(" " + numDocs + ": s=" + s); } final Document doc = new Document(); doc.add(new SortedDocValuesField("stringdv", br)); doc.add(new NumericDocValuesField("id", numDocs)); docValues.add(br); writer.addDocument(doc); numDocs++; if (random.nextInt(40) == 17) { // force flush writer.getReader().close(); } } writer.forceMerge(1); final DirectoryReader r = writer.getReader(); writer.close(); final LeafReader sr = getOnlyLeafReader(r); final long END_TIME = System.nanoTime() + TimeUnit.NANOSECONDS.convert((TEST_NIGHTLY ? 30 : 1), TimeUnit.SECONDS); final int NUM_THREADS = TestUtil.nextInt(random(), 1, 10); Thread[] threads = new Thread[NUM_THREADS]; for (int thread = 0; thread < NUM_THREADS; thread++) { threads[thread] = new Thread() { @Override public void run() { Random random = random(); final SortedDocValues stringDVDirect; final NumericDocValues docIDToID; try { stringDVDirect = sr.getSortedDocValues("stringdv"); docIDToID = sr.getNumericDocValues("id"); assertNotNull(stringDVDirect); } catch (IOException ioe) { throw new RuntimeException(ioe); } int[] docIDToIDArray = new int[sr.maxDoc()]; for (int i = 0; i < sr.maxDoc(); i++) { try { assertEquals(i, docIDToID.nextDoc()); } catch (IOException ioe) { throw new RuntimeException(ioe); } try { docIDToIDArray[i] = (int) docIDToID.longValue(); } catch (IOException ioe) { throw new RuntimeException(ioe); } } while (System.nanoTime() < END_TIME) { for (int iter = 0; iter < 100; iter++) { final int docID = random.nextInt(sr.maxDoc()); try { SortedDocValues dvs = sr.getSortedDocValues("stringdv"); assertEquals(docID, dvs.advance(docID)); assertEquals(docValues.get(docIDToIDArray[docID]), dvs.binaryValue()); } catch (IOException ioe) { throw new RuntimeException(ioe); } } } } }; threads[thread].start(); } for (Thread thread : threads) { thread.join(); } r.close(); dir.close(); }
From source file:org.codelibs.elasticsearch.search.slice.TermsSliceQuery.java
License:Apache License
/** * Returns a DocIdSet per segments containing the matching docs for the specified slice. *//*from ww w. j a v a 2 s .co m*/ private DocIdSet build(LeafReader reader) throws IOException { final DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc()); final Terms terms = reader.terms(getField()); final TermsEnum te = terms.iterator(); PostingsEnum docsEnum = null; for (BytesRef term = te.next(); term != null; term = te.next()) { int hashCode = term.hashCode(); if (contains(hashCode)) { docsEnum = te.postings(docsEnum, PostingsEnum.NONE); builder.add(docsEnum); } } return builder.build(); }
From source file:org.elasticsearch.index.fielddata.plain.GeoPointArrayIndexFieldData.java
License:Apache License
@Override public AtomicGeoPointFieldData loadDirect(LeafReaderContext context) throws Exception { LeafReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); AtomicGeoPointFieldData data = null; // TODO: Use an actual estimator to estimate before loading. NonEstimatingEstimator estimator = new NonEstimatingEstimator( breakerService.getBreaker(CircuitBreaker.FIELDDATA)); if (terms == null) { data = AbstractAtomicGeoPointFieldData.empty(reader.maxDoc()); estimator.afterLoad(null, data.ramBytesUsed()); return data; }//from w w w .j a v a 2s .c o m return (Version.indexCreated(indexSettings).before(Version.V_2_2_0)) ? loadLegacyFieldData(reader, estimator, terms, data) : loadFieldData22(reader, estimator, terms, data); }
From source file:org.elasticsearch.index.fielddata.plain.GeoPointArrayIndexFieldData.java
License:Apache License
/** * long encoded geopoint field data/*from w ww . j av a2s . c o m*/ */ private AtomicGeoPointFieldData loadFieldData22(LeafReader reader, NonEstimatingEstimator estimator, Terms terms, AtomicGeoPointFieldData data) throws Exception { LongArray indexedPoints = BigArrays.NON_RECYCLING_INSTANCE.newLongArray(128); final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat( "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); boolean success = false; try (OrdinalsBuilder builder = new OrdinalsBuilder(reader.maxDoc(), acceptableTransientOverheadRatio)) { final TermsEnum termsEnum; final GeoPointField.TermEncoding termEncoding; if (Version.indexCreated(indexSettings).onOrAfter(Version.V_2_3_0)) { termEncoding = GeoPointField.TermEncoding.PREFIX; termsEnum = OrdinalsBuilder.wrapGeoPointTerms(terms.iterator()); } else { termEncoding = GeoPointField.TermEncoding.NUMERIC; termsEnum = OrdinalsBuilder.wrapNumeric64Bit(terms.iterator()); } final GeoPointTermsEnum iter = new GeoPointTermsEnum(builder.buildFromTerms(termsEnum), termEncoding); Long hashedPoint; long numTerms = 0; while ((hashedPoint = iter.next()) != null) { indexedPoints = BigArrays.NON_RECYCLING_INSTANCE.resize(indexedPoints, numTerms + 1); indexedPoints.set(numTerms++, hashedPoint); } indexedPoints = BigArrays.NON_RECYCLING_INSTANCE.resize(indexedPoints, numTerms); Ordinals build = builder.build(fieldDataType.getSettings()); RandomAccessOrds ordinals = build.ordinals(); if (!(FieldData.isMultiValued(ordinals) || CommonSettings .getMemoryStorageHint(fieldDataType) == CommonSettings.MemoryStorageFormat.ORDINALS)) { int maxDoc = reader.maxDoc(); LongArray sIndexedPoint = BigArrays.NON_RECYCLING_INSTANCE.newLongArray(reader.maxDoc()); for (int i = 0; i < maxDoc; ++i) { ordinals.setDocument(i); long nativeOrdinal = ordinals.nextOrd(); if (nativeOrdinal != RandomAccessOrds.NO_MORE_ORDS) { sIndexedPoint.set(i, indexedPoints.get(nativeOrdinal)); } } BitSet set = builder.buildDocsWithValuesSet(); data = new GeoPointArrayAtomicFieldData.Single(sIndexedPoint, set); } else { data = new GeoPointArrayAtomicFieldData.WithOrdinals(indexedPoints, build, reader.maxDoc()); } success = true; return data; } finally { if (success) { estimator.afterLoad(null, data.ramBytesUsed()); } } }