Example usage for org.apache.lucene.index RandomIndexWriter forceMerge

List of usage examples for org.apache.lucene.index RandomIndexWriter forceMerge

Introduction

In this page you can find the example usage for org.apache.lucene.index RandomIndexWriter forceMerge.

Prototype

public void forceMerge(int maxSegmentCount) throws IOException 

Source Link

Document

Forces a forceMerge.

Usage

From source file:com.sindicetech.siren.util.SirenTestCase.java

License:Open Source License

protected void forceMerge(final RandomIndexWriter writer) throws IOException {
    writer.forceMerge(1);
}

From source file:org.apache.solr.search.function.TestOrdValues.java

License:Apache License

protected static void createIndex(boolean doMultiSegment) throws Exception {
    if (VERBOSE) {
        System.out.println("TEST: setUp");
    }/*from www.  j a v a  2  s . c o m*/
    // prepare a small index with just a few documents.
    dir = newDirectory();
    anlzr = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(anlzr).setMergePolicy(newLogMergePolicy());
    if (doMultiSegment) {
        iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 7));
    }
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    // add docs not exactly in natural ID order, to verify we do check the order of docs by scores
    int remaining = N_DOCS;
    boolean done[] = new boolean[N_DOCS];
    int i = 0;
    while (remaining > 0) {
        if (done[i]) {
            throw new Exception(
                    "to set this test correctly N_DOCS=" + N_DOCS + " must be primary and greater than 2!");
        }
        addDoc(iw, i);
        done[i] = true;
        i = (i + 4) % N_DOCS;
        remaining--;
    }
    if (!doMultiSegment) {
        if (VERBOSE) {
            System.out.println("TEST: setUp full merge");
        }
        iw.forceMerge(1);
    }
    iw.close();
    if (VERBOSE) {
        System.out.println("TEST: setUp done close");
    }
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

public void testSortedTermsEnum() throws IOException {
    Directory directory = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
    iwconfig.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);

    Document doc = new Document();
    doc.add(new StringField("field", "hello", Field.Store.NO));
    iwriter.addDocument(doc);/* ww w. j  a v  a  2 s. c  o  m*/

    doc = new Document();
    doc.add(new StringField("field", "world", Field.Store.NO));
    // we need a second value for a doc, or we don't actually test DocTermOrds!
    doc.add(new StringField("field", "hello", Field.Store.NO));
    iwriter.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("field", "beer", Field.Store.NO));
    iwriter.addDocument(doc);
    iwriter.forceMerge(1);

    DirectoryReader ireader = iwriter.getReader();
    iwriter.close();

    LeafReader ar = getOnlyLeafReader(ireader);
    SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null);
    assertEquals(3, dv.getValueCount());

    TermsEnum termsEnum = dv.termsEnum();

    // next()
    assertEquals("beer", termsEnum.next().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertEquals("hello", termsEnum.next().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertEquals("world", termsEnum.next().utf8ToString());
    assertEquals(2, termsEnum.ord());

    // seekCeil()
    assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));

    // seekExact()
    assertTrue(termsEnum.seekExact(new BytesRef("beer")));
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertTrue(termsEnum.seekExact(new BytesRef("hello")));
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertTrue(termsEnum.seekExact(new BytesRef("world")));
    assertEquals("world", termsEnum.term().utf8ToString());
    assertEquals(2, termsEnum.ord());
    assertFalse(termsEnum.seekExact(new BytesRef("bogus")));

    // seek(ord)
    termsEnum.seekExact(0);
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    termsEnum.seekExact(1);
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    termsEnum.seekExact(2);
    assertEquals("world", termsEnum.term().utf8ToString());
    assertEquals(2, termsEnum.ord());

    // lookupTerm(BytesRef) 
    assertEquals(-1, dv.lookupTerm(new BytesRef("apple")));
    assertEquals(0, dv.lookupTerm(new BytesRef("beer")));
    assertEquals(-2, dv.lookupTerm(new BytesRef("car")));
    assertEquals(1, dv.lookupTerm(new BytesRef("hello")));
    assertEquals(-3, dv.lookupTerm(new BytesRef("matter")));
    assertEquals(2, dv.lookupTerm(new BytesRef("world")));
    assertEquals(-4, dv.lookupTerm(new BytesRef("zany")));

    ireader.close();
    directory.close();
}

From source file:org.apache.solr.uninverting.TestFieldCache.java

License:Apache License

public void testLongFieldCache() throws IOException {
    Directory dir = newDirectory();//from   w  w w  .  j av  a2 s.  co m
    IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
    cfg.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
    Document doc = new Document();
    LongPoint field = new LongPoint("f", 0L);
    StoredField field2 = new StoredField("f", 0L);
    doc.add(field);
    doc.add(field2);
    final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
    Set<Integer> missing = new HashSet<>();
    for (int i = 0; i < values.length; ++i) {
        final long v;
        switch (random().nextInt(10)) {
        case 0:
            v = Long.MIN_VALUE;
            break;
        case 1:
            v = 0;
            break;
        case 2:
            v = Long.MAX_VALUE;
            break;
        default:
            v = TestUtil.nextLong(random(), -10, 10);
            break;
        }
        values[i] = v;
        if (v == 0 && random().nextBoolean()) {
            // missing
            iw.addDocument(new Document());
            missing.add(i);
        } else {
            field.setLongValue(v);
            field2.setLongValue(v);
            iw.addDocument(doc);
        }
    }
    iw.forceMerge(1);
    final DirectoryReader reader = iw.getReader();
    final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f",
            FieldCache.LONG_POINT_PARSER);
    for (int i = 0; i < values.length; ++i) {
        if (missing.contains(i) == false) {
            assertEquals(i, longs.nextDoc());
            assertEquals(values[i], longs.longValue());
        }
    }
    assertEquals(NO_MORE_DOCS, longs.nextDoc());
    reader.close();
    iw.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestFieldCache.java

License:Apache License

public void testIntFieldCache() throws IOException {
    Directory dir = newDirectory();/*from   w  w  w  . j a va  2s  .c o m*/
    IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
    cfg.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
    Document doc = new Document();
    IntPoint field = new IntPoint("f", 0);
    doc.add(field);
    final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
    Set<Integer> missing = new HashSet<>();
    for (int i = 0; i < values.length; ++i) {
        final int v;
        switch (random().nextInt(10)) {
        case 0:
            v = Integer.MIN_VALUE;
            break;
        case 1:
            v = 0;
            break;
        case 2:
            v = Integer.MAX_VALUE;
            break;
        default:
            v = TestUtil.nextInt(random(), -10, 10);
            break;
        }
        values[i] = v;
        if (v == 0 && random().nextBoolean()) {
            // missing
            iw.addDocument(new Document());
            missing.add(i);
        } else {
            field.setIntValue(v);
            iw.addDocument(doc);
        }
    }
    iw.forceMerge(1);
    final DirectoryReader reader = iw.getReader();
    final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f",
            FieldCache.INT_POINT_PARSER);
    for (int i = 0; i < values.length; ++i) {
        if (missing.contains(i) == false) {
            assertEquals(i, ints.nextDoc());
            assertEquals(values[i], ints.longValue());
        }
    }
    assertEquals(NO_MORE_DOCS, ints.nextDoc());
    reader.close();
    iw.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java

License:Apache License

private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception {
    Directory dir = newDirectory();/*from www .j  a v a  2  s .  co m*/
    IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);

    // index some docs
    int numDocs = atLeast(300);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
        doc.add(idField);
        final int length = TestUtil.nextInt(random(), minLength, maxLength);
        int numValues = random().nextInt(17);
        // create a random list of strings
        List<String> values = new ArrayList<>();
        for (int v = 0; v < numValues; v++) {
            values.add(TestUtil.randomSimpleString(random(), minLength, length));
        }

        // add in any order to the indexed field
        ArrayList<String> unordered = new ArrayList<>(values);
        Collections.shuffle(unordered, random());
        for (String v : values) {
            doc.add(newStringField("indexed", v, Field.Store.NO));
        }

        // add in any order to the dv field
        ArrayList<String> unordered2 = new ArrayList<>(values);
        Collections.shuffle(unordered2, random());
        for (String v : unordered2) {
            doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
        }

        writer.addDocument(doc);
        if (random().nextInt(31) == 0) {
            writer.commit();
        }
    }

    // delete some docs
    int numDeletions = random().nextInt(numDocs / 10);
    for (int i = 0; i < numDeletions; i++) {
        int id = random().nextInt(numDocs);
        writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }

    // compare per-segment
    DirectoryReader ir = writer.getReader();
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
        SortedSetDocValues actual = r.getSortedSetDocValues("dv");
        assertEquals(r.maxDoc(), expected, actual);
    }
    ir.close();

    writer.forceMerge(1);

    // now compare again after the merge
    ir = writer.getReader();
    LeafReader ar = getOnlyLeafReader(ir);
    SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
    SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
    assertEquals(ir.maxDoc(), expected, actual);
    ir.close();

    writer.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java

License:Apache License

private void doTestMissingVsFieldCache(LongProducer longs) throws Exception {
    Directory dir = newDirectory();/*  w  ww  . j a  va  2 s .  c  o m*/
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    Field idField = new StringField("id", "", Field.Store.NO);
    Field indexedField = newStringField("indexed", "", Field.Store.NO);
    Field dvField = new NumericDocValuesField("dv", 0);

    // index some docs
    int numDocs = atLeast(300);
    // numDocs should be always > 256 so that in case of a codec that optimizes
    // for numbers of values <= 256, all storage layouts are tested
    assert numDocs > 256;
    for (int i = 0; i < numDocs; i++) {
        idField.setStringValue(Integer.toString(i));
        long value = longs.next();
        indexedField.setStringValue(Long.toString(value));
        dvField.setLongValue(value);
        Document doc = new Document();
        doc.add(idField);
        // 1/4 of the time we neglect to add the fields
        if (random().nextInt(4) > 0) {
            doc.add(indexedField);
            doc.add(dvField);
        }
        writer.addDocument(doc);
        if (random().nextInt(31) == 0) {
            writer.commit();
        }
    }

    // delete some docs
    int numDeletions = random().nextInt(numDocs / 10);
    for (int i = 0; i < numDeletions; i++) {
        int id = random().nextInt(numDocs);
        writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }

    // merge some segments and ensure that at least one of them has more than
    // 256 values
    writer.forceMerge(numDocs / 256);

    writer.close();

    // compare
    DirectoryReader ir = DirectoryReader.open(dir);
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        Bits expected = FieldCache.DEFAULT.getDocsWithField(r, "indexed", null);
        Bits actual = FieldCache.DEFAULT.getDocsWithField(r, "dv", null);
        assertEquals(expected, actual);
    }
    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestFieldCacheWithThreads.java

License:Apache License

public void test2() throws Exception {
    Random random = random();/*from w w  w . j  a  v a 2 s  . c  o  m*/
    final int NUM_DOCS = atLeast(100);
    final Directory dir = newDirectory();
    final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
    final boolean allowDups = random.nextBoolean();
    final Set<String> seen = new HashSet<>();
    if (VERBOSE) {
        System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
    }
    int numDocs = 0;
    final List<BytesRef> docValues = new ArrayList<>();

    // TODO: deletions
    while (numDocs < NUM_DOCS) {
        final String s;
        if (random.nextBoolean()) {
            s = TestUtil.randomSimpleString(random);
        } else {
            s = TestUtil.randomUnicodeString(random);
        }
        final BytesRef br = new BytesRef(s);

        if (!allowDups) {
            if (seen.contains(s)) {
                continue;
            }
            seen.add(s);
        }

        if (VERBOSE) {
            System.out.println("  " + numDocs + ": s=" + s);
        }

        final Document doc = new Document();
        doc.add(new SortedDocValuesField("stringdv", br));
        doc.add(new NumericDocValuesField("id", numDocs));
        docValues.add(br);
        writer.addDocument(doc);
        numDocs++;

        if (random.nextInt(40) == 17) {
            // force flush
            writer.getReader().close();
        }
    }

    writer.forceMerge(1);
    final DirectoryReader r = writer.getReader();
    writer.close();

    final LeafReader sr = getOnlyLeafReader(r);

    final long END_TIME = System.nanoTime()
            + TimeUnit.NANOSECONDS.convert((TEST_NIGHTLY ? 30 : 1), TimeUnit.SECONDS);

    final int NUM_THREADS = TestUtil.nextInt(random(), 1, 10);
    Thread[] threads = new Thread[NUM_THREADS];
    for (int thread = 0; thread < NUM_THREADS; thread++) {
        threads[thread] = new Thread() {
            @Override
            public void run() {
                Random random = random();
                final SortedDocValues stringDVDirect;
                final NumericDocValues docIDToID;
                try {
                    stringDVDirect = sr.getSortedDocValues("stringdv");
                    docIDToID = sr.getNumericDocValues("id");
                    assertNotNull(stringDVDirect);
                } catch (IOException ioe) {
                    throw new RuntimeException(ioe);
                }
                int[] docIDToIDArray = new int[sr.maxDoc()];
                for (int i = 0; i < sr.maxDoc(); i++) {
                    try {
                        assertEquals(i, docIDToID.nextDoc());
                    } catch (IOException ioe) {
                        throw new RuntimeException(ioe);
                    }
                    try {
                        docIDToIDArray[i] = (int) docIDToID.longValue();
                    } catch (IOException ioe) {
                        throw new RuntimeException(ioe);
                    }
                }
                while (System.nanoTime() < END_TIME) {
                    for (int iter = 0; iter < 100; iter++) {
                        final int docID = random.nextInt(sr.maxDoc());
                        try {
                            SortedDocValues dvs = sr.getSortedDocValues("stringdv");
                            assertEquals(docID, dvs.advance(docID));
                            assertEquals(docValues.get(docIDToIDArray[docID]), dvs.binaryValue());
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    }
                }
            }
        };
        threads[thread].start();
    }

    for (Thread thread : threads) {
        thread.join();
    }

    r.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestLegacyFieldCache.java

License:Apache License

public void testLongFieldCache() throws IOException {
    Directory dir = newDirectory();//from  www.  j  a va2 s . co  m
    IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
    cfg.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
    Document doc = new Document();
    LegacyLongField field = new LegacyLongField("f", 0L, Store.YES);
    doc.add(field);
    final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
    Set<Integer> missing = new HashSet<>();
    for (int i = 0; i < values.length; ++i) {
        final long v;
        switch (random().nextInt(10)) {
        case 0:
            v = Long.MIN_VALUE;
            break;
        case 1:
            v = 0;
            break;
        case 2:
            v = Long.MAX_VALUE;
            break;
        default:
            v = TestUtil.nextLong(random(), -10, 10);
            break;
        }
        values[i] = v;
        if (v == 0 && random().nextBoolean()) {
            // missing
            iw.addDocument(new Document());
            missing.add(i);
        } else {
            field.setLongValue(v);
            iw.addDocument(doc);
        }
    }
    iw.forceMerge(1);
    final DirectoryReader reader = iw.getReader();
    final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f",
            FieldCache.LEGACY_LONG_PARSER);
    for (int i = 0; i < values.length; ++i) {
        if (missing.contains(i) == false) {
            assertEquals(i, longs.nextDoc());
            assertEquals(values[i], longs.longValue());
        }
    }
    assertEquals(NO_MORE_DOCS, longs.nextDoc());
    reader.close();
    iw.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestLegacyFieldCache.java

License:Apache License

public void testIntFieldCache() throws IOException {
    Directory dir = newDirectory();/*from w w w. jav  a  2  s.  c  om*/
    IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
    cfg.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
    Document doc = new Document();
    LegacyIntField field = new LegacyIntField("f", 0, Store.YES);
    doc.add(field);
    final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
    Set<Integer> missing = new HashSet<>();
    for (int i = 0; i < values.length; ++i) {
        final int v;
        switch (random().nextInt(10)) {
        case 0:
            v = Integer.MIN_VALUE;
            break;
        case 1:
            v = 0;
            break;
        case 2:
            v = Integer.MAX_VALUE;
            break;
        default:
            v = TestUtil.nextInt(random(), -10, 10);
            break;
        }
        values[i] = v;
        if (v == 0 && random().nextBoolean()) {
            // missing
            iw.addDocument(new Document());
            missing.add(i);
        } else {
            field.setIntValue(v);
            iw.addDocument(doc);
        }
    }
    iw.forceMerge(1);
    final DirectoryReader reader = iw.getReader();
    final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f",
            FieldCache.LEGACY_INT_PARSER);
    for (int i = 0; i < values.length; ++i) {
        if (missing.contains(i) == false) {
            assertEquals(i, ints.nextDoc());
            assertEquals(values[i], ints.longValue());
        }
    }
    assertEquals(NO_MORE_DOCS, ints.nextDoc());
    reader.close();
    iw.close();
    dir.close();
}