List of usage examples for org.apache.lucene.index IndexWriter waitForMerges
void waitForMerges() throws IOException
From source file:org.elasticsearch.benchmark.compress.LuceneCompressionBenchmark.java
License:Apache License
public static void main(String[] args) throws Exception { final long MAX_SIZE = ByteSizeValue.parseBytesSizeValue("50mb").bytes(); final boolean WITH_TV = true; File testFile = new File("target/test/compress/lucene"); FileSystemUtils.deleteRecursively(testFile); testFile.mkdirs();//from w w w . j ava 2 s .co m FSDirectory uncompressedDir = new NIOFSDirectory(new File(testFile, "uncompressed")); IndexWriter uncompressedWriter = new IndexWriter(uncompressedDir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Directory compressedLzfDir = new CompressedDirectory( new NIOFSDirectory(new File(testFile, "compressed_lzf")), new LZFCompressor(), false, "fdt", "tvf"); IndexWriter compressedLzfWriter = new IndexWriter(compressedLzfDir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Directory compressedSnappyDir = new CompressedDirectory( new NIOFSDirectory(new File(testFile, "compressed_snappy")), new XerialSnappyCompressor(), false, "fdt", "tvf"); IndexWriter compressedSnappyWriter = new IndexWriter(compressedSnappyDir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); System.out.println("feeding data..."); TestData testData = new TestData(); while (testData.next() && testData.getTotalSize() < MAX_SIZE) { // json XContentBuilder builder = XContentFactory.jsonBuilder(); testData.current(builder); builder.close(); Document doc = new Document(); doc.add(new Field("_source", builder.bytes().array(), builder.bytes().arrayOffset(), builder.bytes().length())); if (WITH_TV) { Field field = new Field("text", builder.string(), Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(field); } uncompressedWriter.addDocument(doc); compressedLzfWriter.addDocument(doc); compressedSnappyWriter.addDocument(doc); } System.out.println("optimizing..."); uncompressedWriter.forceMerge(1); compressedLzfWriter.forceMerge(1); compressedSnappyWriter.forceMerge(1); uncompressedWriter.waitForMerges(); compressedLzfWriter.waitForMerges(); compressedSnappyWriter.waitForMerges(); System.out.println("done"); uncompressedWriter.close(); compressedLzfWriter.close(); compressedSnappyWriter.close(); compressedLzfDir.close(); compressedSnappyDir.close(); uncompressedDir.close(); }
From source file:org.elasticsearch.benchmark.compress.LuceneCompressionBenchmarkTests.java
License:Apache License
@Test public void test() throws Exception { final long MAX_SIZE = ByteSizeValue.parseBytesSizeValue("50mb").bytes(); final boolean WITH_TV = true; File testFile = new File("target/test/compress/lucene"); FileSystemUtils.deleteRecursively(testFile); testFile.mkdirs();//from w w w .j a v a2 s. co m FSDirectory uncompressedDir = new NIOFSDirectory(new File(testFile, "uncompressed")); IndexWriter uncompressedWriter = new IndexWriter(uncompressedDir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Compressor lzf = CompressorFactory.compressor("lzf"); Directory compressedLzfDir = new CompressedDirectory( new NIOFSDirectory(new File(testFile, "compressed_lzf")), lzf, false, "fdt", "tvf"); IndexWriter compressedLzfWriter = new IndexWriter(compressedLzfDir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); //Directory compressedSnappyDir = new CompressedDirectory(new NIOFSDirectory(new File(testFile, "compressed_snappy")), new XerialSnappyCompressor(), false, "fdt", "tvf"); //IndexWriter compressedSnappyWriter = new IndexWriter(compressedSnappyDir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); logger.info("feeding data..."); TestData testData = new TestData(); while (testData.next() && testData.getTotalSize() < MAX_SIZE) { // json XContentBuilder builder = XContentFactory.jsonBuilder(); testData.current(builder); builder.close(); Document doc = new Document(); doc.add(new Field("_source", builder.bytes().array(), builder.bytes().arrayOffset(), builder.bytes().length())); if (WITH_TV) { Field field = new Field("text", builder.string(), Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(field); } uncompressedWriter.addDocument(doc); compressedLzfWriter.addDocument(doc); //compressedSnappyWriter.addDocument(doc); } logger.info("optimizing..."); uncompressedWriter.forceMerge(1); compressedLzfWriter.forceMerge(1); //compressedSnappyWriter.forceMerge(1); uncompressedWriter.waitForMerges(); compressedLzfWriter.waitForMerges(); //compressedSnappyWriter.waitForMerges(); logger.info("done"); uncompressedWriter.close(); compressedLzfWriter.close(); //compressedSnappyWriter.close(); compressedLzfDir.close(); //compressedSnappyDir.close(); uncompressedDir.close(); }
From source file:org.elasticsearch.test.unit.common.compress.CompressIndexInputOutputTests.java
License:Apache License
private void lucene(Compressor compressor) throws Exception { CompressedDirectory dir = new CompressedDirectory(new RAMDirectory(), compressor, false, "fdt"); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); writer.addDocument(createDoc(1, (int) SizeValue.parseSizeValue("100b").singles())); writer.addDocument(createDoc(2, (int) SizeValue.parseSizeValue("5k").singles())); writer.commit();// w w w .ja va 2s . c o m writer.addDocument(createDoc(3, (int) SizeValue.parseSizeValue("2k").singles())); writer.addDocument(createDoc(4, (int) SizeValue.parseSizeValue("1k").singles())); writer.commit(); verify(writer); writer.forceMerge(1); writer.waitForMerges(); verify(writer); dir.setCompress(false); writer.addDocument(createDoc(5, (int) SizeValue.parseSizeValue("2k").singles())); writer.addDocument(createDoc(6, (int) SizeValue.parseSizeValue("1k").singles())); verify(writer); writer.forceMerge(1); writer.waitForMerges(); verify(writer); writer.close(); }
From source file:perf.IDPerfTest.java
License:Apache License
private static Result testOne(String indexPath, String desc, IDIterator ids, final int minTermsInBlock, final int maxTermsInBlock) throws IOException { System.out.println("\ntest: " + desc + " termBlocks=" + minTermsInBlock + "/" + maxTermsInBlock); Directory dir = FSDirectory.open(new File(indexPath)); //IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48)); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_8, new StandardAnalyzer(Version.LUCENE_4_8)); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // So I can walk the files and get the *.tip sizes: iwc.setUseCompoundFile(false);// ww w . j av a 2s. c o m iwc.setCodec(new Lucene53Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return new Lucene50PostingsFormat(minTermsInBlock, maxTermsInBlock); } }); /// 7/7/7 segment structure: iwc.setMaxBufferedDocs(ID_COUNT / 777); iwc.setRAMBufferSizeMB(-1); //iwc.setInfoStream(new PrintStreamInfoStream(System.out)); //iwc.setMergePolicy(new LogDocMergePolicy()); ((TieredMergePolicy) iwc.getMergePolicy()).setFloorSegmentMB(.001); ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(0.0); //((LogDocMergePolicy) iwc.getMergePolicy()).setMinMergeDocs(1000); iwc.getMergePolicy().setNoCFSRatio(0.0); IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); FieldType ft = new FieldType(StringField.TYPE_NOT_STORED); ft.setTokenized(true); ft.freeze(); BytesRef idValue = new BytesRef(64); Field idField = new Field("id", new BinaryTokenStream(idValue), ft); doc.add(idField); long t0 = System.nanoTime(); BytesRef[] lookupIDs = new BytesRef[ID_SEARCH_COUNT]; Random random = new Random(17); int lookupCount = 0; double rate = 1.01 * ((double) ID_SEARCH_COUNT) / ID_COUNT; for (int i = 0; i < ID_COUNT; i++) { ids.next(idValue); if (lookupCount < lookupIDs.length && random.nextDouble() <= rate) { lookupIDs[lookupCount++] = BytesRef.deepCopyOf(idValue); } // Trickery: the idsIter changed the idValue which the BinaryTokenStream reuses for each added doc w.addDocument(doc); } if (lookupCount < lookupIDs.length) { throw new RuntimeException("didn't get enough lookup ids: " + lookupCount + " vs " + lookupIDs.length); } long indexTime = System.nanoTime() - t0; System.out.println(" indexing done; waitForMerges..."); w.waitForMerges(); IndexReader r = DirectoryReader.open(w, true); System.out.println(" reader=" + r); shuffle(random, lookupIDs); shuffle(random, lookupIDs); long bestTime = Long.MAX_VALUE; long checksum = 0; List<AtomicReaderContext> leaves = new ArrayList<>(r.leaves()); // Sort largest to smallest: Collections.sort(leaves, new Comparator<AtomicReaderContext>() { @Override public int compare(AtomicReaderContext c1, AtomicReaderContext c2) { return c2.reader().maxDoc() - c1.reader().maxDoc(); } }); TermsEnum[] termsEnums = new TermsEnum[leaves.size()]; DocsEnum[] docsEnums = new DocsEnum[leaves.size()]; int[] docBases = new int[leaves.size()]; for (int i = 0; i < leaves.size(); i++) { //System.out.println("i=" + i + " count=" + leaves.get(i).reader().maxDoc()); termsEnums[i] = leaves.get(i).reader().fields().terms("id").iterator(null); docBases[i] = leaves.get(i).docBase; } long rawLookupCount = 0; int countx = 0; for (int iter = 0; iter < 5; iter++) { t0 = System.nanoTime(); BlockTreeTermsReader.seekExactFastNotFound = 0; BlockTreeTermsReader.seekExactFastRootNotFound = 0; rawLookupCount = 0; for (BytesRef id : lookupIDs) { if (countx++ < 50) { System.out.println(" id=" + id); } boolean found = false; for (int seg = 0; seg < termsEnums.length; seg++) { rawLookupCount++; if (termsEnums[seg].seekExact(id)) { docsEnums[seg] = termsEnums[seg].docs(null, docsEnums[seg], 0); int docID = docsEnums[seg].nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { // uh-oh! throw new RuntimeException("id not found: " + id); } // paranoia: checksum += docID + docBases[seg]; found = true; // Optimization vs MultiFields: we don't need to check any more segments since id is PK break; } } if (found == false) { // uh-oh! throw new RuntimeException("id not found: " + id); } } long lookupTime = System.nanoTime() - t0; System.out.println(String.format(Locale.ROOT, " iter=" + iter + " lookupTime=%.3f sec", lookupTime / 1000000000.0)); if (lookupTime < bestTime) { bestTime = lookupTime; System.out.println(" **"); } } long totalBytes = 0; long termsIndexTotalBytes = 0; for (String fileName : dir.listAll()) { long bytes = dir.fileLength(fileName); totalBytes += bytes; if (fileName.endsWith(".tip")) { termsIndexTotalBytes += bytes; } } r.close(); w.rollback(); dir.close(); return new Result(desc, ID_COUNT / (indexTime / 1000000.0), lookupIDs.length / (bestTime / 1000000.0), totalBytes, termsIndexTotalBytes, checksum, BlockTreeTermsReader.seekExactFastNotFound, BlockTreeTermsReader.seekExactFastRootNotFound, rawLookupCount, minTermsInBlock, maxTermsInBlock); }
From source file:perf.PKLookupPerfTest3X.java
License:Apache License
private static void createIndex(final Directory dir, final int docCount) throws IOException { System.out.println("Create index... " + docCount + " docs"); final IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, new WhitespaceAnalyzer(Version.LUCENE_35)); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // 5 segs per level in 3 levels: int mbd = docCount / (5 * 111); iwc.setMaxBufferedDocs(mbd);// w w w .j av a 2 s . co m iwc.setRAMBufferSizeMB(-1.0); ((TieredMergePolicy) iwc.getMergePolicy()).setUseCompoundFile(false); final IndexWriter w = new IndexWriter(dir, iwc); //w.setInfoStream(System.out); final Document doc = new Document(); final Field field = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); field.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY); doc.add(field); for (int i = 0; i < docCount; i++) { field.setValue(String.format("%09d", i)); w.addDocument(doc); if ((i + 1) % 1000000 == 0) { System.out.println((i + 1) + "..."); } } w.waitForMerges(); w.close(); }