Example usage for org.apache.lucene.index IndexWriter waitForMerges

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter waitForMerges.

Prototype

void waitForMerges() throws IOException

Source Link

Document

Wait for any currently outstanding merges to finish.

Usage

From source file:org.elasticsearch.benchmark.compress.LuceneCompressionBenchmark.java

License:Apache License

public static void main(String[] args) throws Exception {
    final long MAX_SIZE = ByteSizeValue.parseBytesSizeValue("50mb").bytes();
    final boolean WITH_TV = true;

    File testFile = new File("target/test/compress/lucene");
    FileSystemUtils.deleteRecursively(testFile);
    testFile.mkdirs();//from  w w w  . j  ava 2  s .co m

    FSDirectory uncompressedDir = new NIOFSDirectory(new File(testFile, "uncompressed"));
    IndexWriter uncompressedWriter = new IndexWriter(uncompressedDir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Directory compressedLzfDir = new CompressedDirectory(
            new NIOFSDirectory(new File(testFile, "compressed_lzf")), new LZFCompressor(), false, "fdt", "tvf");
    IndexWriter compressedLzfWriter = new IndexWriter(compressedLzfDir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Directory compressedSnappyDir = new CompressedDirectory(
            new NIOFSDirectory(new File(testFile, "compressed_snappy")), new XerialSnappyCompressor(), false,
            "fdt", "tvf");
    IndexWriter compressedSnappyWriter = new IndexWriter(compressedSnappyDir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    System.out.println("feeding data...");
    TestData testData = new TestData();
    while (testData.next() && testData.getTotalSize() < MAX_SIZE) {
        // json
        XContentBuilder builder = XContentFactory.jsonBuilder();
        testData.current(builder);
        builder.close();
        Document doc = new Document();
        doc.add(new Field("_source", builder.bytes().array(), builder.bytes().arrayOffset(),
                builder.bytes().length()));
        if (WITH_TV) {
            Field field = new Field("text", builder.string(), Field.Store.NO, Field.Index.ANALYZED,
                    Field.TermVector.WITH_POSITIONS_OFFSETS);
            doc.add(field);
        }
        uncompressedWriter.addDocument(doc);
        compressedLzfWriter.addDocument(doc);
        compressedSnappyWriter.addDocument(doc);
    }
    System.out.println("optimizing...");
    uncompressedWriter.forceMerge(1);
    compressedLzfWriter.forceMerge(1);
    compressedSnappyWriter.forceMerge(1);
    uncompressedWriter.waitForMerges();
    compressedLzfWriter.waitForMerges();
    compressedSnappyWriter.waitForMerges();

    System.out.println("done");
    uncompressedWriter.close();
    compressedLzfWriter.close();
    compressedSnappyWriter.close();

    compressedLzfDir.close();
    compressedSnappyDir.close();
    uncompressedDir.close();
}

From source file:org.elasticsearch.benchmark.compress.LuceneCompressionBenchmarkTests.java

License:Apache License

@Test
public void test() throws Exception {
    final long MAX_SIZE = ByteSizeValue.parseBytesSizeValue("50mb").bytes();
    final boolean WITH_TV = true;

    File testFile = new File("target/test/compress/lucene");
    FileSystemUtils.deleteRecursively(testFile);
    testFile.mkdirs();//from w w w .j a v a2 s.  co  m

    FSDirectory uncompressedDir = new NIOFSDirectory(new File(testFile, "uncompressed"));
    IndexWriter uncompressedWriter = new IndexWriter(uncompressedDir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Compressor lzf = CompressorFactory.compressor("lzf");
    Directory compressedLzfDir = new CompressedDirectory(
            new NIOFSDirectory(new File(testFile, "compressed_lzf")), lzf, false, "fdt", "tvf");
    IndexWriter compressedLzfWriter = new IndexWriter(compressedLzfDir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    //Directory compressedSnappyDir = new CompressedDirectory(new NIOFSDirectory(new File(testFile, "compressed_snappy")), new XerialSnappyCompressor(), false, "fdt", "tvf");
    //IndexWriter compressedSnappyWriter = new IndexWriter(compressedSnappyDir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    logger.info("feeding data...");
    TestData testData = new TestData();
    while (testData.next() && testData.getTotalSize() < MAX_SIZE) {
        // json
        XContentBuilder builder = XContentFactory.jsonBuilder();
        testData.current(builder);
        builder.close();
        Document doc = new Document();
        doc.add(new Field("_source", builder.bytes().array(), builder.bytes().arrayOffset(),
                builder.bytes().length()));
        if (WITH_TV) {
            Field field = new Field("text", builder.string(), Field.Store.NO, Field.Index.ANALYZED,
                    Field.TermVector.WITH_POSITIONS_OFFSETS);
            doc.add(field);
        }
        uncompressedWriter.addDocument(doc);
        compressedLzfWriter.addDocument(doc);
        //compressedSnappyWriter.addDocument(doc);
    }
    logger.info("optimizing...");
    uncompressedWriter.forceMerge(1);
    compressedLzfWriter.forceMerge(1);
    //compressedSnappyWriter.forceMerge(1);
    uncompressedWriter.waitForMerges();
    compressedLzfWriter.waitForMerges();
    //compressedSnappyWriter.waitForMerges();

    logger.info("done");
    uncompressedWriter.close();
    compressedLzfWriter.close();
    //compressedSnappyWriter.close();

    compressedLzfDir.close();
    //compressedSnappyDir.close();
    uncompressedDir.close();
}

From source file:org.elasticsearch.test.unit.common.compress.CompressIndexInputOutputTests.java

License:Apache License

private void lucene(Compressor compressor) throws Exception {
    CompressedDirectory dir = new CompressedDirectory(new RAMDirectory(), compressor, false, "fdt");
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));
    writer.addDocument(createDoc(1, (int) SizeValue.parseSizeValue("100b").singles()));
    writer.addDocument(createDoc(2, (int) SizeValue.parseSizeValue("5k").singles()));
    writer.commit();//  w  w  w .ja va 2s  . c o  m
    writer.addDocument(createDoc(3, (int) SizeValue.parseSizeValue("2k").singles()));
    writer.addDocument(createDoc(4, (int) SizeValue.parseSizeValue("1k").singles()));
    writer.commit();
    verify(writer);
    writer.forceMerge(1);
    writer.waitForMerges();
    verify(writer);
    dir.setCompress(false);
    writer.addDocument(createDoc(5, (int) SizeValue.parseSizeValue("2k").singles()));
    writer.addDocument(createDoc(6, (int) SizeValue.parseSizeValue("1k").singles()));
    verify(writer);
    writer.forceMerge(1);
    writer.waitForMerges();
    verify(writer);
    writer.close();
}

From source file:perf.IDPerfTest.java

License:Apache License

private static Result testOne(String indexPath, String desc, IDIterator ids, final int minTermsInBlock,
        final int maxTermsInBlock) throws IOException {
    System.out.println("\ntest: " + desc + " termBlocks=" + minTermsInBlock + "/" + maxTermsInBlock);
    Directory dir = FSDirectory.open(new File(indexPath));
    //IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48));
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_8, new StandardAnalyzer(Version.LUCENE_4_8));
    iwc.setMergeScheduler(new SerialMergeScheduler());
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    // So I can walk the files and get the *.tip sizes:
    iwc.setUseCompoundFile(false);//  ww w . j av a  2s.  c  o  m

    iwc.setCodec(new Lucene53Codec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {
            return new Lucene50PostingsFormat(minTermsInBlock, maxTermsInBlock);
        }
    });

    /// 7/7/7 segment structure:
    iwc.setMaxBufferedDocs(ID_COUNT / 777);
    iwc.setRAMBufferSizeMB(-1);
    //iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    //iwc.setMergePolicy(new LogDocMergePolicy());
    ((TieredMergePolicy) iwc.getMergePolicy()).setFloorSegmentMB(.001);
    ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(0.0);
    //((LogDocMergePolicy) iwc.getMergePolicy()).setMinMergeDocs(1000);
    iwc.getMergePolicy().setNoCFSRatio(0.0);

    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();

    FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
    ft.setTokenized(true);
    ft.freeze();

    BytesRef idValue = new BytesRef(64);
    Field idField = new Field("id", new BinaryTokenStream(idValue), ft);
    doc.add(idField);

    long t0 = System.nanoTime();
    BytesRef[] lookupIDs = new BytesRef[ID_SEARCH_COUNT];
    Random random = new Random(17);
    int lookupCount = 0;
    double rate = 1.01 * ((double) ID_SEARCH_COUNT) / ID_COUNT;
    for (int i = 0; i < ID_COUNT; i++) {
        ids.next(idValue);
        if (lookupCount < lookupIDs.length && random.nextDouble() <= rate) {
            lookupIDs[lookupCount++] = BytesRef.deepCopyOf(idValue);
        }
        // Trickery: the idsIter changed the idValue which the BinaryTokenStream reuses for each added doc
        w.addDocument(doc);
    }

    if (lookupCount < lookupIDs.length) {
        throw new RuntimeException("didn't get enough lookup ids: " + lookupCount + " vs " + lookupIDs.length);
    }

    long indexTime = System.nanoTime() - t0;

    System.out.println("  indexing done; waitForMerges...");
    w.waitForMerges();

    IndexReader r = DirectoryReader.open(w, true);
    System.out.println("  reader=" + r);

    shuffle(random, lookupIDs);
    shuffle(random, lookupIDs);

    long bestTime = Long.MAX_VALUE;
    long checksum = 0;

    List<AtomicReaderContext> leaves = new ArrayList<>(r.leaves());
    // Sort largest to smallest:
    Collections.sort(leaves, new Comparator<AtomicReaderContext>() {
        @Override
        public int compare(AtomicReaderContext c1, AtomicReaderContext c2) {
            return c2.reader().maxDoc() - c1.reader().maxDoc();
        }
    });
    TermsEnum[] termsEnums = new TermsEnum[leaves.size()];
    DocsEnum[] docsEnums = new DocsEnum[leaves.size()];
    int[] docBases = new int[leaves.size()];
    for (int i = 0; i < leaves.size(); i++) {
        //System.out.println("i=" + i + " count=" + leaves.get(i).reader().maxDoc());
        termsEnums[i] = leaves.get(i).reader().fields().terms("id").iterator(null);
        docBases[i] = leaves.get(i).docBase;
    }

    long rawLookupCount = 0;

    int countx = 0;
    for (int iter = 0; iter < 5; iter++) {
        t0 = System.nanoTime();
        BlockTreeTermsReader.seekExactFastNotFound = 0;
        BlockTreeTermsReader.seekExactFastRootNotFound = 0;
        rawLookupCount = 0;
        for (BytesRef id : lookupIDs) {
            if (countx++ < 50) {
                System.out.println("    id=" + id);
            }
            boolean found = false;
            for (int seg = 0; seg < termsEnums.length; seg++) {
                rawLookupCount++;
                if (termsEnums[seg].seekExact(id)) {
                    docsEnums[seg] = termsEnums[seg].docs(null, docsEnums[seg], 0);
                    int docID = docsEnums[seg].nextDoc();
                    if (docID == DocsEnum.NO_MORE_DOCS) {
                        // uh-oh!
                        throw new RuntimeException("id not found: " + id);
                    }
                    // paranoia:
                    checksum += docID + docBases[seg];

                    found = true;

                    // Optimization vs MultiFields: we don't need to check any more segments since id is PK
                    break;
                }
            }
            if (found == false) {
                // uh-oh!
                throw new RuntimeException("id not found: " + id);
            }
        }
        long lookupTime = System.nanoTime() - t0;
        System.out.println(String.format(Locale.ROOT, "  iter=" + iter + " lookupTime=%.3f sec",
                lookupTime / 1000000000.0));
        if (lookupTime < bestTime) {
            bestTime = lookupTime;
            System.out.println("    **");
        }
    }

    long totalBytes = 0;
    long termsIndexTotalBytes = 0;
    for (String fileName : dir.listAll()) {
        long bytes = dir.fileLength(fileName);
        totalBytes += bytes;
        if (fileName.endsWith(".tip")) {
            termsIndexTotalBytes += bytes;
        }
    }

    r.close();
    w.rollback();
    dir.close();

    return new Result(desc, ID_COUNT / (indexTime / 1000000.0), lookupIDs.length / (bestTime / 1000000.0),
            totalBytes, termsIndexTotalBytes, checksum, BlockTreeTermsReader.seekExactFastNotFound,
            BlockTreeTermsReader.seekExactFastRootNotFound, rawLookupCount, minTermsInBlock, maxTermsInBlock);
}

From source file:perf.PKLookupPerfTest3X.java

License:Apache License

private static void createIndex(final Directory dir, final int docCount) throws IOException {
    System.out.println("Create index... " + docCount + " docs");

    final IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35,
            new WhitespaceAnalyzer(Version.LUCENE_35));
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    // 5 segs per level in 3 levels:
    int mbd = docCount / (5 * 111);
    iwc.setMaxBufferedDocs(mbd);// w w  w  .j av  a 2  s  .  co m
    iwc.setRAMBufferSizeMB(-1.0);
    ((TieredMergePolicy) iwc.getMergePolicy()).setUseCompoundFile(false);
    final IndexWriter w = new IndexWriter(dir, iwc);
    //w.setInfoStream(System.out);

    final Document doc = new Document();
    final Field field = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
    field.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY);
    doc.add(field);

    for (int i = 0; i < docCount; i++) {
        field.setValue(String.format("%09d", i));
        w.addDocument(doc);
        if ((i + 1) % 1000000 == 0) {
            System.out.println((i + 1) + "...");
        }
    }
    w.waitForMerges();
    w.close();
}