List of usage examples for org.apache.lucene.index LeafReader getSortedDocValues
public abstract SortedDocValues getSortedDocValues(String field) throws IOException;
From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java
License:Apache License
private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception { Directory dir = newDirectory();//from w w w .j a va2 s. com IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field indexedField = new StringField("indexed", "", Field.Store.NO); Field dvField = new SortedDocValuesField("dv", new BytesRef()); doc.add(idField); doc.add(indexedField); doc.add(dvField); // index some docs int numDocs = atLeast(300); for (int i = 0; i < numDocs; i++) { idField.setStringValue(Integer.toString(i)); final int length; if (minLength == maxLength) { length = minLength; // fixed length } else { length = TestUtil.nextInt(random(), minLength, maxLength); } String value = TestUtil.randomSimpleString(random(), length); indexedField.setStringValue(value); dvField.setBytesValue(new BytesRef(value)); writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } writer.close(); // compare DirectoryReader ir = DirectoryReader.open(dir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed"); SortedDocValues actual = r.getSortedDocValues("dv"); assertEquals(r.maxDoc(), expected, actual); } ir.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestFieldCacheWithThreads.java
License:Apache License
public void test2() throws Exception { Random random = random();// ww w . j av a2 s . c o m final int NUM_DOCS = atLeast(100); final Directory dir = newDirectory(); final RandomIndexWriter writer = new RandomIndexWriter(random, dir); final boolean allowDups = random.nextBoolean(); final Set<String> seen = new HashSet<>(); if (VERBOSE) { System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups); } int numDocs = 0; final List<BytesRef> docValues = new ArrayList<>(); // TODO: deletions while (numDocs < NUM_DOCS) { final String s; if (random.nextBoolean()) { s = TestUtil.randomSimpleString(random); } else { s = TestUtil.randomUnicodeString(random); } final BytesRef br = new BytesRef(s); if (!allowDups) { if (seen.contains(s)) { continue; } seen.add(s); } if (VERBOSE) { System.out.println(" " + numDocs + ": s=" + s); } final Document doc = new Document(); doc.add(new SortedDocValuesField("stringdv", br)); doc.add(new NumericDocValuesField("id", numDocs)); docValues.add(br); writer.addDocument(doc); numDocs++; if (random.nextInt(40) == 17) { // force flush writer.getReader().close(); } } writer.forceMerge(1); final DirectoryReader r = writer.getReader(); writer.close(); final LeafReader sr = getOnlyLeafReader(r); final long END_TIME = System.nanoTime() + TimeUnit.NANOSECONDS.convert((TEST_NIGHTLY ? 30 : 1), TimeUnit.SECONDS); final int NUM_THREADS = TestUtil.nextInt(random(), 1, 10); Thread[] threads = new Thread[NUM_THREADS]; for (int thread = 0; thread < NUM_THREADS; thread++) { threads[thread] = new Thread() { @Override public void run() { Random random = random(); final SortedDocValues stringDVDirect; final NumericDocValues docIDToID; try { stringDVDirect = sr.getSortedDocValues("stringdv"); docIDToID = sr.getNumericDocValues("id"); assertNotNull(stringDVDirect); } catch (IOException ioe) { throw new RuntimeException(ioe); } int[] docIDToIDArray = new int[sr.maxDoc()]; for (int i = 0; i < sr.maxDoc(); i++) { try { assertEquals(i, docIDToID.nextDoc()); } catch (IOException ioe) { throw new RuntimeException(ioe); } try { docIDToIDArray[i] = (int) docIDToID.longValue(); } catch (IOException ioe) { throw new RuntimeException(ioe); } } while (System.nanoTime() < END_TIME) { for (int iter = 0; iter < 100; iter++) { final int docID = random.nextInt(sr.maxDoc()); try { SortedDocValues dvs = sr.getSortedDocValues("stringdv"); assertEquals(docID, dvs.advance(docID)); assertEquals(docValues.get(docIDToIDArray[docID]), dvs.binaryValue()); } catch (IOException ioe) { throw new RuntimeException(ioe); } } } } }; threads[thread].start(); } for (Thread thread : threads) { thread.join(); } r.close(); dir.close(); }
From source file:org.apache.tika.eval.tools.SlowCompositeReaderWrapper.java
License:Apache License
@Override public SortedDocValues getSortedDocValues(String field) throws IOException { ensureOpen();//from www .ja va 2 s . c o m OrdinalMap map = null; synchronized (cachedOrdMaps) { map = cachedOrdMaps.get(field); if (map == null) { // uncached, or not a multi dv SortedDocValues dv = MultiDocValues.getSortedValues(in, field); if (dv instanceof MultiSortedDocValues) { map = ((MultiSortedDocValues) dv).mapping; IndexReader.CacheHelper cacheHelper = getReaderCacheHelper(); if (cacheHelper != null && map.owner == cacheHelper.getKey()) { cachedOrdMaps.put(field, map); } } return dv; } } int size = in.leaves().size(); final SortedDocValues[] values = new SortedDocValues[size]; final int[] starts = new int[size + 1]; long totalCost = 0; for (int i = 0; i < size; i++) { LeafReaderContext context = in.leaves().get(i); final LeafReader reader = context.reader(); final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) { return null; } SortedDocValues v = reader.getSortedDocValues(field); if (v == null) { v = DocValues.emptySorted(); } totalCost += v.cost(); values[i] = v; starts[i] = context.docBase; } starts[size] = maxDoc(); return new MultiSortedDocValues(values, starts, map, totalCost); }
From source file:org.elasticsearch.index.fielddata.fieldcomparator.ReplaceMissingTests.java
License:Apache License
public void test() throws Exception { Directory dir = newDirectory();/*from w ww .ja v a2s .c o m*/ IndexWriterConfig iwc = newIndexWriterConfig(null); iwc.setMergePolicy(newLogMergePolicy()); IndexWriter iw = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new SortedDocValuesField("field", new BytesRef("cat"))); iw.addDocument(doc); doc = new Document(); iw.addDocument(doc); doc = new Document(); doc.add(new SortedDocValuesField("field", new BytesRef("dog"))); iw.addDocument(doc); iw.forceMerge(1); iw.close(); DirectoryReader reader = DirectoryReader.open(dir); LeafReader ar = getOnlySegmentReader(reader); SortedDocValues raw = ar.getSortedDocValues("field"); assertEquals(2, raw.getValueCount()); // existing values SortedDocValues dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("cat")); assertEquals(2, dv.getValueCount()); assertEquals("cat", dv.lookupOrd(0).utf8ToString()); assertEquals("dog", dv.lookupOrd(1).utf8ToString()); assertEquals(0, dv.getOrd(0)); assertEquals(0, dv.getOrd(1)); assertEquals(1, dv.getOrd(2)); dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("dog")); assertEquals(2, dv.getValueCount()); assertEquals("cat", dv.lookupOrd(0).utf8ToString()); assertEquals("dog", dv.lookupOrd(1).utf8ToString()); assertEquals(0, dv.getOrd(0)); assertEquals(1, dv.getOrd(1)); assertEquals(1, dv.getOrd(2)); // non-existing values dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("apple")); assertEquals(3, dv.getValueCount()); assertEquals("apple", dv.lookupOrd(0).utf8ToString()); assertEquals("cat", dv.lookupOrd(1).utf8ToString()); assertEquals("dog", dv.lookupOrd(2).utf8ToString()); assertEquals(1, dv.getOrd(0)); assertEquals(0, dv.getOrd(1)); assertEquals(2, dv.getOrd(2)); dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("company")); assertEquals(3, dv.getValueCount()); assertEquals("cat", dv.lookupOrd(0).utf8ToString()); assertEquals("company", dv.lookupOrd(1).utf8ToString()); assertEquals("dog", dv.lookupOrd(2).utf8ToString()); assertEquals(0, dv.getOrd(0)); assertEquals(1, dv.getOrd(1)); assertEquals(2, dv.getOrd(2)); dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("ebay")); assertEquals(3, dv.getValueCount()); assertEquals("cat", dv.lookupOrd(0).utf8ToString()); assertEquals("dog", dv.lookupOrd(1).utf8ToString()); assertEquals("ebay", dv.lookupOrd(2).utf8ToString()); assertEquals(0, dv.getOrd(0)); assertEquals(2, dv.getOrd(1)); assertEquals(1, dv.getOrd(2)); reader.close(); dir.close(); }
From source file:org.elasticsearch.join.fetch.ParentJoinFieldSubFetchPhase.java
License:Apache License
private String getSortedDocValue(String field, LeafReader reader, int docId) { try {//from w ww . ja v a 2s. c om SortedDocValues docValues = reader.getSortedDocValues(field); if (docValues == null || docValues.advanceExact(docId) == false) { return null; } int ord = docValues.ordValue(); BytesRef joinName = docValues.lookupOrd(ord); return joinName.utf8ToString(); } catch (IOException e) { throw ExceptionsHelper.convertToElastic(e); } }
From source file:org.elasticsearch.search.fetch.parent.ParentFieldSubFetchPhase.java
License:Apache License
public static String getParentId(ParentFieldMapper fieldMapper, LeafReader reader, int docId) { try {/*from w w w . j a v a 2s.c o m*/ SortedDocValues docValues = reader.getSortedDocValues(fieldMapper.name()); BytesRef parentId = docValues.get(docId); assert parentId.length > 0; return parentId.utf8ToString(); } catch (IOException e) { throw ExceptionsHelper.convertToElastic(e); } }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** * test filtering two sorted dv fields//from w ww . j a v a 2 s .c o m */ public void testSortedDocValues() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); IndexWriter iw = new IndexWriter(dir, iwc); // add document with 2 fields Document doc = new Document(); doc.add(new SortedDocValuesField("fieldA", new BytesRef("testA"))); doc.add(new SortedDocValuesField("fieldB", new BytesRef("testB"))); iw.addDocument(doc); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("fieldA"))); // see only one field LeafReader segmentReader = ir.leaves().get(0).reader(); SortedDocValues values = segmentReader.getSortedDocValues("fieldA"); assertNotNull(values); assertTrue(values.advanceExact(0)); assertEquals(new BytesRef("testA"), values.binaryValue()); assertNull(segmentReader.getSortedDocValues("fieldB")); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); }
From source file:suonos.lucene.fields.IndexedFieldCountsBuilder.java
License:Apache License
public IndexedFieldCountsBuilder addField(String fieldName, String filter) throws IOException { final IndexedField fld = models.indexedField(fieldName); final Map<String, IndexedFieldTermCount> valuesMap = AntLib.newHashMap(); final TIntIntHashMap ordCounts = new TIntIntHashMap(); if (filter != null) { filter = filter.toLowerCase();/* ww w . j a v a2s . c om*/ } // Get count of segments. // int sz = ir.leaves().size(); for (int i = 0; i != sz; i++) { // Get the segment reader. // LeafReader lr = ir.leaves().get(i).reader(); // Doc count for field. Eg "album_genres" // lr.getDocCount(fld.getName()); // Get all documents that have the field "album_genres" // Bits docs = lr.getDocsWithField(fld.getName()); ordCounts.clear(); // Enumerate the field terms. // if (fld.isDocValues()) { if (fld.isMultiValue()) { // docvalues & multivalue is a SortedSetDocValues // Per-Document values in a SortedDocValues are // deduplicated, dereferenced, and sorted into a dictionary // of // unique values. A pointer to the dictionary value // (ordinal) can be retrieved for each document. // Ordinals are dense and in increasing sorted order. // SortedSetDocValues set = lr.getSortedSetDocValues(fld.getName()); if (set != null) { // For all documents that have the field "album_genres": // for (int docId = 0; docId != docs.length(); docId++) { if (docs.get(docId)) { // Enumerate the set of [terms] of // "album_genres" for the document represented // by docId. // Each ord represents the term value. // set.setDocument(docId); // For each term bump up the frequency. // long ord; while ((ord = set.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { ordCounts.adjustOrPutValue((int) ord, 1, 1); System.out.println("term=" + set.lookupOrd(ord).utf8ToString()); } } } TermsEnum te = set.termsEnum(); BytesRef term; while ((term = te.next()) != null) { int ord = (int) te.ord(); add(fld, valuesMap, filter, term, ordCounts.get(ord)); } } } else { SortedDocValues set = lr.getSortedDocValues(fld.getName()); if (set != null) { // For all documents that have the field "album_genres": // for (int docId = 0; docId != docs.length(); docId++) { if (docs.get(docId)) { // Get the term - Classical, Rock, etc. // BytesRef term = set.get(docId); add(fld, valuesMap, filter, term, 1); } } } } } else { // Normal field, not a doc value. // Terms terms = lr.terms(fld.getName()); TermsEnum te = terms.iterator(); BytesRef term; while ((term = te.next()) != null) { add(fld, valuesMap, filter, term, te.docFreq()); } } /* * SORTED doc[0] = "aardvark" doc[1] = "beaver" doc[2] = "aardvark" * * doc[0] = 0 doc[1] = 1 doc[2] = 0 * * term[0] = "aardvark" term[1] = "beaver" */ // http://127.0.0.1:8080/api/facets?fields=track_title_a // the above should return B:(4) because titles starting with B are // 4! } // Get the array of term counters. // IndexedFieldTermCount[] list = valuesMap.values().toArray(new IndexedFieldTermCount[0]); // Sort by term. // Arrays.sort(list); // add to the map. // this.fieldCounts.put(fld.getName(), list); return this; }