List of usage examples for org.apache.lucene.index LeafReader getSortedSetDocValues
public abstract SortedSetDocValues getSortedSetDocValues(String field) throws IOException;
From source file:org.apache.solr.uninverting.TestUninvertingReader.java
License:Apache License
public void testSortedSetDouble() throws IOException { Directory dir = newDirectory();//w ww . jav a 2 s. c o m IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO)); iw.addDocument(doc); doc = new Document(); doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO)); doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(-3d), Field.Store.NO)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), Collections.singletonMap("foo", Type.SORTED_SET_DOUBLE)); LeafReader ar = ir.leaves().get(0).reader(); SortedSetDocValues v = ar.getSortedSetDocValues("foo"); assertEquals(2, v.getValueCount()); assertEquals(0, v.nextDoc()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); assertEquals(1, v.nextDoc()); assertEquals(0, v.nextOrd()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); BytesRef value = v.lookupOrd(0); assertEquals(Double.doubleToRawLongBits(-3d), LegacyNumericUtils.prefixCodedToLong(value)); value = v.lookupOrd(1); assertEquals(Double.doubleToRawLongBits(5d), LegacyNumericUtils.prefixCodedToLong(value)); TestUtil.checkReader(ir); ir.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestUninvertingReader.java
License:Apache License
/** Tests {@link Type#SORTED_SET_INTEGER} using Integer based fields, with and w/o precision steps */ public void testSortedSetIntegerManyValues() throws IOException { final Directory dir = newDirectory(); final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); final LegacyFieldType NO_TRIE_TYPE = new LegacyFieldType(LegacyIntField.TYPE_NOT_STORED); NO_TRIE_TYPE.setNumericPrecisionStep(Integer.MAX_VALUE); final Map<String, Type> UNINVERT_MAP = new LinkedHashMap<String, Type>(); UNINVERT_MAP.put("notrie_single", Type.SORTED_SET_INTEGER); UNINVERT_MAP.put("notrie_multi", Type.SORTED_SET_INTEGER); UNINVERT_MAP.put("trie_single", Type.SORTED_SET_INTEGER); UNINVERT_MAP.put("trie_multi", Type.SORTED_SET_INTEGER); final Set<String> MULTI_VALUES = new LinkedHashSet<String>(); MULTI_VALUES.add("trie_multi"); MULTI_VALUES.add("notrie_multi"); final int NUM_DOCS = TestUtil.nextInt(random(), 200, 1500); final int MIN = TestUtil.nextInt(random(), 10, 100); final int MAX = MIN + TestUtil.nextInt(random(), 10, 100); final long EXPECTED_VALSET_SIZE = 1 + MAX - MIN; { // (at least) one doc should have every value, so that at least one segment has every value final Document doc = new Document(); for (int i = MIN; i <= MAX; i++) { doc.add(new LegacyIntField("trie_multi", i, Field.Store.NO)); doc.add(new LegacyIntField("notrie_multi", i, NO_TRIE_TYPE)); }/*from w ww . jav a 2 s . c om*/ iw.addDocument(doc); } // now add some more random docs (note: starting at i=1 because of previously added doc) for (int i = 1; i < NUM_DOCS; i++) { final Document doc = new Document(); if (0 != TestUtil.nextInt(random(), 0, 9)) { int val = TestUtil.nextInt(random(), MIN, MAX); doc.add(new LegacyIntField("trie_single", val, Field.Store.NO)); doc.add(new LegacyIntField("notrie_single", val, NO_TRIE_TYPE)); } if (0 != TestUtil.nextInt(random(), 0, 9)) { int numMulti = atLeast(1); while (0 < numMulti--) { int val = TestUtil.nextInt(random(), MIN, MAX); doc.add(new LegacyIntField("trie_multi", val, Field.Store.NO)); doc.add(new LegacyIntField("notrie_multi", val, NO_TRIE_TYPE)); } } iw.addDocument(doc); } iw.close(); final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP); TestUtil.checkReader(ir); final int NUM_LEAVES = ir.leaves().size(); // check the leaves: no more then total set size for (LeafReaderContext rc : ir.leaves()) { final LeafReader ar = rc.reader(); for (String f : UNINVERT_MAP.keySet()) { final SortedSetDocValues v = DocValues.getSortedSet(ar, f); final long valSetSize = v.getValueCount(); assertTrue(f + ": Expected no more then " + EXPECTED_VALSET_SIZE + " values per segment, got " + valSetSize + " from: " + ar.toString(), valSetSize <= EXPECTED_VALSET_SIZE); if (1 == NUM_LEAVES && MULTI_VALUES.contains(f)) { // tighter check on multi fields in single segment index since we know one doc has all of them assertEquals( f + ": Single segment LeafReader's value set should have had exactly expected size", EXPECTED_VALSET_SIZE, valSetSize); } } } // check the composite of all leaves: exact expectation of set size final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir); TestUtil.checkReader(composite); for (String f : MULTI_VALUES) { final SortedSetDocValues v = composite.getSortedSetDocValues(f); final long valSetSize = v.getValueCount(); assertEquals(f + ": Composite reader value set should have had exactly expected size", EXPECTED_VALSET_SIZE, valSetSize); } ir.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestUninvertingReader.java
License:Apache License
public void testSortedSetEmptyIndex() throws IOException { final Directory dir = newDirectory(); final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); iw.close();//from ww w . j a va 2 s .c o m final Map<String, Type> UNINVERT_MAP = new LinkedHashMap<String, Type>(); for (Type t : EnumSet.allOf(Type.class)) { UNINVERT_MAP.put(t.name(), t); } final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP); TestUtil.checkReader(ir); final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir); TestUtil.checkReader(composite); for (String f : UNINVERT_MAP.keySet()) { // check the leaves // (normally there are none for an empty index, so this is really just future // proofing in case that changes for some reason) for (LeafReaderContext rc : ir.leaves()) { final LeafReader ar = rc.reader(); assertNull(f + ": Expected no doc values from empty index (leaf)", ar.getSortedSetDocValues(f)); } // check the composite assertNull(f + ": Expected no doc values from empty index (composite)", composite.getSortedSetDocValues(f)); } ir.close(); dir.close(); }
From source file:org.apache.tika.eval.tools.SlowCompositeReaderWrapper.java
License:Apache License
@Override public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { ensureOpen();/* ww w . j a va 2 s . c o m*/ OrdinalMap map = null; synchronized (cachedOrdMaps) { map = cachedOrdMaps.get(field); if (map == null) { // uncached, or not a multi dv SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field); if (dv instanceof MultiDocValues.MultiSortedSetDocValues) { map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping; IndexReader.CacheHelper cacheHelper = getReaderCacheHelper(); if (cacheHelper != null && map.owner == cacheHelper.getKey()) { cachedOrdMaps.put(field, map); } } return dv; } } assert map != null; int size = in.leaves().size(); final SortedSetDocValues[] values = new SortedSetDocValues[size]; final int[] starts = new int[size + 1]; long cost = 0; for (int i = 0; i < size; i++) { LeafReaderContext context = in.leaves().get(i); final LeafReader reader = context.reader(); final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) { return null; } SortedSetDocValues v = reader.getSortedSetDocValues(field); if (v == null) { v = DocValues.emptySortedSet(); } values[i] = v; starts[i] = context.docBase; cost += v.cost(); } starts[size] = maxDoc(); return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, cost); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** * test filtering two sortedset dv fields *//* ww w . j a va 2s. c o m*/ public void testSortedSetDocValues() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); IndexWriter iw = new IndexWriter(dir, iwc); // add document with 2 fields Document doc = new Document(); doc.add(new SortedSetDocValuesField("fieldA", new BytesRef("testA"))); doc.add(new SortedSetDocValuesField("fieldB", new BytesRef("testB"))); iw.addDocument(doc); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("fieldA"))); // see only one field LeafReader segmentReader = ir.leaves().get(0).reader(); SortedSetDocValues dv = segmentReader.getSortedSetDocValues("fieldA"); assertNotNull(dv); assertTrue(dv.advanceExact(0)); assertEquals(0, dv.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd()); assertEquals(new BytesRef("testA"), dv.lookupOrd(0)); assertNull(segmentReader.getSortedSetDocValues("fieldB")); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); }
From source file:suonos.lucene.fields.IndexedFieldCountsBuilder.java
License:Apache License
public IndexedFieldCountsBuilder addField(String fieldName, String filter) throws IOException { final IndexedField fld = models.indexedField(fieldName); final Map<String, IndexedFieldTermCount> valuesMap = AntLib.newHashMap(); final TIntIntHashMap ordCounts = new TIntIntHashMap(); if (filter != null) { filter = filter.toLowerCase();//from w w w . j ava2 s . co m } // Get count of segments. // int sz = ir.leaves().size(); for (int i = 0; i != sz; i++) { // Get the segment reader. // LeafReader lr = ir.leaves().get(i).reader(); // Doc count for field. Eg "album_genres" // lr.getDocCount(fld.getName()); // Get all documents that have the field "album_genres" // Bits docs = lr.getDocsWithField(fld.getName()); ordCounts.clear(); // Enumerate the field terms. // if (fld.isDocValues()) { if (fld.isMultiValue()) { // docvalues & multivalue is a SortedSetDocValues // Per-Document values in a SortedDocValues are // deduplicated, dereferenced, and sorted into a dictionary // of // unique values. A pointer to the dictionary value // (ordinal) can be retrieved for each document. // Ordinals are dense and in increasing sorted order. // SortedSetDocValues set = lr.getSortedSetDocValues(fld.getName()); if (set != null) { // For all documents that have the field "album_genres": // for (int docId = 0; docId != docs.length(); docId++) { if (docs.get(docId)) { // Enumerate the set of [terms] of // "album_genres" for the document represented // by docId. // Each ord represents the term value. // set.setDocument(docId); // For each term bump up the frequency. // long ord; while ((ord = set.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { ordCounts.adjustOrPutValue((int) ord, 1, 1); System.out.println("term=" + set.lookupOrd(ord).utf8ToString()); } } } TermsEnum te = set.termsEnum(); BytesRef term; while ((term = te.next()) != null) { int ord = (int) te.ord(); add(fld, valuesMap, filter, term, ordCounts.get(ord)); } } } else { SortedDocValues set = lr.getSortedDocValues(fld.getName()); if (set != null) { // For all documents that have the field "album_genres": // for (int docId = 0; docId != docs.length(); docId++) { if (docs.get(docId)) { // Get the term - Classical, Rock, etc. // BytesRef term = set.get(docId); add(fld, valuesMap, filter, term, 1); } } } } } else { // Normal field, not a doc value. // Terms terms = lr.terms(fld.getName()); TermsEnum te = terms.iterator(); BytesRef term; while ((term = te.next()) != null) { add(fld, valuesMap, filter, term, te.docFreq()); } } /* * SORTED doc[0] = "aardvark" doc[1] = "beaver" doc[2] = "aardvark" * * doc[0] = 0 doc[1] = 1 doc[2] = 0 * * term[0] = "aardvark" term[1] = "beaver" */ // http://127.0.0.1:8080/api/facets?fields=track_title_a // the above should return B:(4) because titles starting with B are // 4! } // Get the array of term counters. // IndexedFieldTermCount[] list = valuesMap.values().toArray(new IndexedFieldTermCount[0]); // Sort by term. // Arrays.sort(list); // add to the map. // this.fieldCounts.put(fld.getName(), list); return this; }