Example usage for org.apache.lucene.index LeafReader getSortedDocValues

List of usage examples for org.apache.lucene.index LeafReader getSortedDocValues

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader getSortedDocValues.

Prototype

public abstract SortedDocValues getSortedDocValues(String field) throws IOException;

Source Link

Document

Returns SortedDocValues for this field, or null if no SortedDocValues were indexed for this field.

Usage

From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java

License:Apache License

private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception {
    Directory dir = newDirectory();//from w w  w .j a  va2  s. com
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    Document doc = new Document();
    Field idField = new StringField("id", "", Field.Store.NO);
    Field indexedField = new StringField("indexed", "", Field.Store.NO);
    Field dvField = new SortedDocValuesField("dv", new BytesRef());
    doc.add(idField);
    doc.add(indexedField);
    doc.add(dvField);

    // index some docs
    int numDocs = atLeast(300);
    for (int i = 0; i < numDocs; i++) {
        idField.setStringValue(Integer.toString(i));
        final int length;
        if (minLength == maxLength) {
            length = minLength; // fixed length
        } else {
            length = TestUtil.nextInt(random(), minLength, maxLength);
        }
        String value = TestUtil.randomSimpleString(random(), length);
        indexedField.setStringValue(value);
        dvField.setBytesValue(new BytesRef(value));
        writer.addDocument(doc);
        if (random().nextInt(31) == 0) {
            writer.commit();
        }
    }

    // delete some docs
    int numDeletions = random().nextInt(numDocs / 10);
    for (int i = 0; i < numDeletions; i++) {
        int id = random().nextInt(numDocs);
        writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    writer.close();

    // compare
    DirectoryReader ir = DirectoryReader.open(dir);
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
        SortedDocValues actual = r.getSortedDocValues("dv");
        assertEquals(r.maxDoc(), expected, actual);
    }
    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestFieldCacheWithThreads.java

License:Apache License

public void test2() throws Exception {
    Random random = random();//  ww w .  j  av a2  s  .  c  o m
    final int NUM_DOCS = atLeast(100);
    final Directory dir = newDirectory();
    final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
    final boolean allowDups = random.nextBoolean();
    final Set<String> seen = new HashSet<>();
    if (VERBOSE) {
        System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
    }
    int numDocs = 0;
    final List<BytesRef> docValues = new ArrayList<>();

    // TODO: deletions
    while (numDocs < NUM_DOCS) {
        final String s;
        if (random.nextBoolean()) {
            s = TestUtil.randomSimpleString(random);
        } else {
            s = TestUtil.randomUnicodeString(random);
        }
        final BytesRef br = new BytesRef(s);

        if (!allowDups) {
            if (seen.contains(s)) {
                continue;
            }
            seen.add(s);
        }

        if (VERBOSE) {
            System.out.println("  " + numDocs + ": s=" + s);
        }

        final Document doc = new Document();
        doc.add(new SortedDocValuesField("stringdv", br));
        doc.add(new NumericDocValuesField("id", numDocs));
        docValues.add(br);
        writer.addDocument(doc);
        numDocs++;

        if (random.nextInt(40) == 17) {
            // force flush
            writer.getReader().close();
        }
    }

    writer.forceMerge(1);
    final DirectoryReader r = writer.getReader();
    writer.close();

    final LeafReader sr = getOnlyLeafReader(r);

    final long END_TIME = System.nanoTime()
            + TimeUnit.NANOSECONDS.convert((TEST_NIGHTLY ? 30 : 1), TimeUnit.SECONDS);

    final int NUM_THREADS = TestUtil.nextInt(random(), 1, 10);
    Thread[] threads = new Thread[NUM_THREADS];
    for (int thread = 0; thread < NUM_THREADS; thread++) {
        threads[thread] = new Thread() {
            @Override
            public void run() {
                Random random = random();
                final SortedDocValues stringDVDirect;
                final NumericDocValues docIDToID;
                try {
                    stringDVDirect = sr.getSortedDocValues("stringdv");
                    docIDToID = sr.getNumericDocValues("id");
                    assertNotNull(stringDVDirect);
                } catch (IOException ioe) {
                    throw new RuntimeException(ioe);
                }
                int[] docIDToIDArray = new int[sr.maxDoc()];
                for (int i = 0; i < sr.maxDoc(); i++) {
                    try {
                        assertEquals(i, docIDToID.nextDoc());
                    } catch (IOException ioe) {
                        throw new RuntimeException(ioe);
                    }
                    try {
                        docIDToIDArray[i] = (int) docIDToID.longValue();
                    } catch (IOException ioe) {
                        throw new RuntimeException(ioe);
                    }
                }
                while (System.nanoTime() < END_TIME) {
                    for (int iter = 0; iter < 100; iter++) {
                        final int docID = random.nextInt(sr.maxDoc());
                        try {
                            SortedDocValues dvs = sr.getSortedDocValues("stringdv");
                            assertEquals(docID, dvs.advance(docID));
                            assertEquals(docValues.get(docIDToIDArray[docID]), dvs.binaryValue());
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    }
                }
            }
        };
        threads[thread].start();
    }

    for (Thread thread : threads) {
        thread.join();
    }

    r.close();
    dir.close();
}

From source file:org.apache.tika.eval.tools.SlowCompositeReaderWrapper.java

License:Apache License

@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
    ensureOpen();//from www  .ja va  2  s . c o  m
    OrdinalMap map = null;
    synchronized (cachedOrdMaps) {
        map = cachedOrdMaps.get(field);
        if (map == null) {
            // uncached, or not a multi dv
            SortedDocValues dv = MultiDocValues.getSortedValues(in, field);
            if (dv instanceof MultiSortedDocValues) {
                map = ((MultiSortedDocValues) dv).mapping;
                IndexReader.CacheHelper cacheHelper = getReaderCacheHelper();
                if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
                    cachedOrdMaps.put(field, map);
                }
            }
            return dv;
        }
    }
    int size = in.leaves().size();
    final SortedDocValues[] values = new SortedDocValues[size];
    final int[] starts = new int[size + 1];
    long totalCost = 0;
    for (int i = 0; i < size; i++) {
        LeafReaderContext context = in.leaves().get(i);
        final LeafReader reader = context.reader();
        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
            return null;
        }
        SortedDocValues v = reader.getSortedDocValues(field);
        if (v == null) {
            v = DocValues.emptySorted();
        }
        totalCost += v.cost();
        values[i] = v;
        starts[i] = context.docBase;
    }
    starts[size] = maxDoc();
    return new MultiSortedDocValues(values, starts, map, totalCost);
}

From source file:org.elasticsearch.index.fielddata.fieldcomparator.ReplaceMissingTests.java

License:Apache License

public void test() throws Exception {
    Directory dir = newDirectory();/*from   w  ww  .ja  v  a2s .c o  m*/
    IndexWriterConfig iwc = newIndexWriterConfig(null);
    iwc.setMergePolicy(newLogMergePolicy());
    IndexWriter iw = new IndexWriter(dir, iwc);

    Document doc = new Document();
    doc.add(new SortedDocValuesField("field", new BytesRef("cat")));
    iw.addDocument(doc);

    doc = new Document();
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new SortedDocValuesField("field", new BytesRef("dog")));
    iw.addDocument(doc);
    iw.forceMerge(1);
    iw.close();

    DirectoryReader reader = DirectoryReader.open(dir);
    LeafReader ar = getOnlySegmentReader(reader);
    SortedDocValues raw = ar.getSortedDocValues("field");
    assertEquals(2, raw.getValueCount());

    // existing values
    SortedDocValues dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("cat"));
    assertEquals(2, dv.getValueCount());
    assertEquals("cat", dv.lookupOrd(0).utf8ToString());
    assertEquals("dog", dv.lookupOrd(1).utf8ToString());

    assertEquals(0, dv.getOrd(0));
    assertEquals(0, dv.getOrd(1));
    assertEquals(1, dv.getOrd(2));

    dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("dog"));
    assertEquals(2, dv.getValueCount());
    assertEquals("cat", dv.lookupOrd(0).utf8ToString());
    assertEquals("dog", dv.lookupOrd(1).utf8ToString());

    assertEquals(0, dv.getOrd(0));
    assertEquals(1, dv.getOrd(1));
    assertEquals(1, dv.getOrd(2));

    // non-existing values
    dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("apple"));
    assertEquals(3, dv.getValueCount());
    assertEquals("apple", dv.lookupOrd(0).utf8ToString());
    assertEquals("cat", dv.lookupOrd(1).utf8ToString());
    assertEquals("dog", dv.lookupOrd(2).utf8ToString());

    assertEquals(1, dv.getOrd(0));
    assertEquals(0, dv.getOrd(1));
    assertEquals(2, dv.getOrd(2));

    dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("company"));
    assertEquals(3, dv.getValueCount());
    assertEquals("cat", dv.lookupOrd(0).utf8ToString());
    assertEquals("company", dv.lookupOrd(1).utf8ToString());
    assertEquals("dog", dv.lookupOrd(2).utf8ToString());

    assertEquals(0, dv.getOrd(0));
    assertEquals(1, dv.getOrd(1));
    assertEquals(2, dv.getOrd(2));

    dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("ebay"));
    assertEquals(3, dv.getValueCount());
    assertEquals("cat", dv.lookupOrd(0).utf8ToString());
    assertEquals("dog", dv.lookupOrd(1).utf8ToString());
    assertEquals("ebay", dv.lookupOrd(2).utf8ToString());

    assertEquals(0, dv.getOrd(0));
    assertEquals(2, dv.getOrd(1));
    assertEquals(1, dv.getOrd(2));

    reader.close();
    dir.close();
}

From source file:org.elasticsearch.join.fetch.ParentJoinFieldSubFetchPhase.java

License:Apache License

private String getSortedDocValue(String field, LeafReader reader, int docId) {
    try {//from  w ww  . ja v a 2s. c  om
        SortedDocValues docValues = reader.getSortedDocValues(field);
        if (docValues == null || docValues.advanceExact(docId) == false) {
            return null;
        }
        int ord = docValues.ordValue();
        BytesRef joinName = docValues.lookupOrd(ord);
        return joinName.utf8ToString();
    } catch (IOException e) {
        throw ExceptionsHelper.convertToElastic(e);
    }
}

From source file:org.elasticsearch.search.fetch.parent.ParentFieldSubFetchPhase.java

License:Apache License

public static String getParentId(ParentFieldMapper fieldMapper, LeafReader reader, int docId) {
    try {/*from   w  w  w . j  a  v a  2s.c o  m*/
        SortedDocValues docValues = reader.getSortedDocValues(fieldMapper.name());
        BytesRef parentId = docValues.get(docId);
        assert parentId.length > 0;
        return parentId.utf8ToString();
    } catch (IOException e) {
        throw ExceptionsHelper.convertToElastic(e);
    }
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java

License:Open Source License

/**
 * test filtering two sorted dv fields//from  w ww  .  j a v a  2  s .c  o m
 */
public void testSortedDocValues() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add document with 2 fields
    Document doc = new Document();
    doc.add(new SortedDocValuesField("fieldA", new BytesRef("testA")));
    doc.add(new SortedDocValuesField("fieldB", new BytesRef("testB")));
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw),
            new CharacterRunAutomaton(Automata.makeString("fieldA")));

    // see only one field
    LeafReader segmentReader = ir.leaves().get(0).reader();
    SortedDocValues values = segmentReader.getSortedDocValues("fieldA");
    assertNotNull(values);
    assertTrue(values.advanceExact(0));
    assertEquals(new BytesRef("testA"), values.binaryValue());
    assertNull(segmentReader.getSortedDocValues("fieldB"));

    TestUtil.checkReader(ir);
    IOUtils.close(ir, iw, dir);
}

From source file:suonos.lucene.fields.IndexedFieldCountsBuilder.java

License:Apache License

public IndexedFieldCountsBuilder addField(String fieldName, String filter) throws IOException {

    final IndexedField fld = models.indexedField(fieldName);
    final Map<String, IndexedFieldTermCount> valuesMap = AntLib.newHashMap();
    final TIntIntHashMap ordCounts = new TIntIntHashMap();

    if (filter != null) {
        filter = filter.toLowerCase();/* ww  w  .  j  a  v a2s .  c  om*/
    }

    // Get count of segments.
    //
    int sz = ir.leaves().size();

    for (int i = 0; i != sz; i++) {
        // Get the segment reader.
        //
        LeafReader lr = ir.leaves().get(i).reader();

        // Doc count for field. Eg "album_genres"
        //
        lr.getDocCount(fld.getName());

        // Get all documents that have the field "album_genres"
        //
        Bits docs = lr.getDocsWithField(fld.getName());
        ordCounts.clear();

        // Enumerate the field terms.
        //
        if (fld.isDocValues()) {
            if (fld.isMultiValue()) {
                // docvalues & multivalue is a SortedSetDocValues
                // Per-Document values in a SortedDocValues are
                // deduplicated, dereferenced, and sorted into a dictionary
                // of
                // unique values. A pointer to the dictionary value
                // (ordinal) can be retrieved for each document.
                // Ordinals are dense and in increasing sorted order.
                //
                SortedSetDocValues set = lr.getSortedSetDocValues(fld.getName());

                if (set != null) {
                    // For all documents that have the field "album_genres":
                    //
                    for (int docId = 0; docId != docs.length(); docId++) {
                        if (docs.get(docId)) {
                            // Enumerate the set of [terms] of
                            // "album_genres" for the document represented
                            // by docId.
                            // Each ord represents the term value.
                            //
                            set.setDocument(docId);

                            // For each term bump up the frequency.
                            //
                            long ord;
                            while ((ord = set.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                                ordCounts.adjustOrPutValue((int) ord, 1, 1);

                                System.out.println("term=" + set.lookupOrd(ord).utf8ToString());
                            }
                        }
                    }

                    TermsEnum te = set.termsEnum();
                    BytesRef term;

                    while ((term = te.next()) != null) {

                        int ord = (int) te.ord();

                        add(fld, valuesMap, filter, term, ordCounts.get(ord));
                    }

                }

            } else {
                SortedDocValues set = lr.getSortedDocValues(fld.getName());

                if (set != null) {
                    // For all documents that have the field "album_genres":
                    //
                    for (int docId = 0; docId != docs.length(); docId++) {
                        if (docs.get(docId)) {
                            // Get the term - Classical, Rock, etc.
                            //
                            BytesRef term = set.get(docId);

                            add(fld, valuesMap, filter, term, 1);
                        }
                    }
                }
            }
        } else {
            // Normal field, not a doc value.
            //
            Terms terms = lr.terms(fld.getName());
            TermsEnum te = terms.iterator();

            BytesRef term;
            while ((term = te.next()) != null) {
                add(fld, valuesMap, filter, term, te.docFreq());
            }
        }

        /*
         * SORTED doc[0] = "aardvark" doc[1] = "beaver" doc[2] = "aardvark"
         * 
         * doc[0] = 0 doc[1] = 1 doc[2] = 0
         * 
         * term[0] = "aardvark" term[1] = "beaver"
         */

        // http://127.0.0.1:8080/api/facets?fields=track_title_a
        // the above should return B:(4) because titles starting with B are
        // 4!
    }

    // Get the array of term counters.
    //
    IndexedFieldTermCount[] list = valuesMap.values().toArray(new IndexedFieldTermCount[0]);

    // Sort by term.
    //
    Arrays.sort(list);

    // add to the map.
    //
    this.fieldCounts.put(fld.getName(), list);

    return this;
}