List of usage examples for org.apache.lucene.index IndexWriterConfig setUseCompoundFile
@Override public IndexWriterConfig setUseCompoundFile(boolean useCompoundFile)
From source file:org.elasticsearch.index.store.StoreTest.java
License:Apache License
@Test public void testRecoveryDiff() throws IOException, InterruptedException { int numDocs = 2 + random().nextInt(100); List<Document> docs = new ArrayList<>(); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random())))); docs.add(doc);//from w ww . jav a2 s .c o m } long seed = random().nextLong(); Store.MetadataSnapshot first; { Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setCodec(actualDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); iwc.setMaxThreadStates(1); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random); Store store = new Store(shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(random, directoryService), new DummyShardLock(shardId)); IndexWriter writer = new IndexWriter(store.directory(), iwc); final boolean lotsOfSegments = rarely(random); for (Document d : docs) { writer.addDocument(d); if (lotsOfSegments && random.nextBoolean()) { writer.commit(); } else if (rarely(random)) { writer.commit(); } } writer.close(); first = store.getMetadata(); assertDeleteContent(store, directoryService); store.close(); } long time = new Date().getTime(); while (time == new Date().getTime()) { Thread.sleep(10); // bump the time } Store.MetadataSnapshot second; Store store; { Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setCodec(actualDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); iwc.setMaxThreadStates(1); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random); store = new Store(shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(random, directoryService), new DummyShardLock(shardId)); IndexWriter writer = new IndexWriter(store.directory(), iwc); final boolean lotsOfSegments = rarely(random); for (Document d : docs) { writer.addDocument(d); if (lotsOfSegments && random.nextBoolean()) { writer.commit(); } else if (rarely(random)) { writer.commit(); } } writer.close(); second = store.getMetadata(); } Store.RecoveryDiff diff = first.recoveryDiff(second); assertThat(first.size(), equalTo(second.size())); for (StoreFileMetaData md : first) { assertThat(second.get(md.name()), notNullValue()); // si files are different - containing timestamps etc assertThat(second.get(md.name()).isSame(md), equalTo(md.name().endsWith(".si") == false)); } assertThat(diff.different.size(), equalTo(first.size() - 1)); assertThat(diff.identical.size(), equalTo(1)); // commit point is identical assertThat(diff.missing, empty()); // check the self diff Store.RecoveryDiff selfDiff = first.recoveryDiff(first); assertThat(selfDiff.identical.size(), equalTo(first.size())); assertThat(selfDiff.different, empty()); assertThat(selfDiff.missing, empty()); // lets add some deletes Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setCodec(actualDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); iwc.setMaxThreadStates(1); iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); IndexWriter writer = new IndexWriter(store.directory(), iwc); writer.deleteDocuments(new Term("id", Integer.toString(random().nextInt(numDocs)))); writer.close(); Store.MetadataSnapshot metadata = store.getMetadata(); StoreFileMetaData delFile = null; for (StoreFileMetaData md : metadata) { if (md.name().endsWith(".del")) { delFile = md; break; } } Store.RecoveryDiff afterDeleteDiff = metadata.recoveryDiff(second); if (delFile != null) { assertThat(afterDeleteDiff.identical.size(), equalTo(metadata.size() - 2)); // segments_N + del file assertThat(afterDeleteDiff.different.size(), equalTo(0)); assertThat(afterDeleteDiff.missing.size(), equalTo(2)); } else { // an entire segment must be missing (single doc segment got dropped) assertThat(afterDeleteDiff.identical.size(), greaterThan(0)); assertThat(afterDeleteDiff.different.size(), equalTo(0)); assertThat(afterDeleteDiff.missing.size(), equalTo(1)); // the commit file is different } // check the self diff selfDiff = metadata.recoveryDiff(metadata); assertThat(selfDiff.identical.size(), equalTo(metadata.size())); assertThat(selfDiff.different, empty()); assertThat(selfDiff.missing, empty()); // add a new commit iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodec(actualDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(true); // force CFS - easier to test here since we know it will add 3 files iwc.setMaxThreadStates(1); iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); writer = new IndexWriter(store.directory(), iwc); writer.addDocument(docs.get(0)); writer.close(); Store.MetadataSnapshot newCommitMetaData = store.getMetadata(); Store.RecoveryDiff newCommitDiff = newCommitMetaData.recoveryDiff(metadata); if (delFile != null) { assertThat(newCommitDiff.identical.size(), equalTo(newCommitMetaData.size() - 5)); // segments_N, del file, cfs, cfe, si for the new segment assertThat(newCommitDiff.different.size(), equalTo(1)); // the del file must be different assertThat(newCommitDiff.different.get(0).name(), endsWith(".del")); assertThat(newCommitDiff.missing.size(), equalTo(4)); // segments_N,cfs, cfe, si for the new segment } else { assertThat(newCommitDiff.identical.size(), equalTo(newCommitMetaData.size() - 4)); // segments_N, cfs, cfe, si for the new segment assertThat(newCommitDiff.different.size(), equalTo(0)); assertThat(newCommitDiff.missing.size(), equalTo(4)); // an entire segment must be missing (single doc segment got dropped) plus the commit is different } deleteContent(store.directory()); IOUtils.close(store); }
From source file:org.elasticsearch.index.store.StoreTests.java
License:Apache License
@Test public void testRecoveryDiff() throws IOException, InterruptedException { int numDocs = 2 + random().nextInt(100); List<Document> docs = new ArrayList<>(); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random())))); docs.add(doc);/* w ww.jav a2 s . c o m*/ } long seed = random().nextLong(); Store.MetadataSnapshot first; { Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)) .setCodec(TestUtil.getDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random); Store store = new Store(shardId, Settings.EMPTY, directoryService, new DummyShardLock(shardId)); IndexWriter writer = new IndexWriter(store.directory(), iwc); final boolean lotsOfSegments = rarely(random); for (Document d : docs) { writer.addDocument(d); if (lotsOfSegments && random.nextBoolean()) { writer.commit(); } else if (rarely(random)) { writer.commit(); } } writer.commit(); writer.close(); first = store.getMetadata(); assertDeleteContent(store, directoryService); store.close(); } long time = new Date().getTime(); while (time == new Date().getTime()) { Thread.sleep(10); // bump the time } Store.MetadataSnapshot second; Store store; { Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)) .setCodec(TestUtil.getDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random); store = new Store(shardId, Settings.EMPTY, directoryService, new DummyShardLock(shardId)); IndexWriter writer = new IndexWriter(store.directory(), iwc); final boolean lotsOfSegments = rarely(random); for (Document d : docs) { writer.addDocument(d); if (lotsOfSegments && random.nextBoolean()) { writer.commit(); } else if (rarely(random)) { writer.commit(); } } writer.commit(); writer.close(); second = store.getMetadata(); } Store.RecoveryDiff diff = first.recoveryDiff(second); assertThat(first.size(), equalTo(second.size())); for (StoreFileMetaData md : first) { assertThat(second.get(md.name()), notNullValue()); // si files are different - containing timestamps etc assertThat(second.get(md.name()).isSame(md), equalTo(false)); } assertThat(diff.different.size(), equalTo(first.size())); assertThat(diff.identical.size(), equalTo(0)); // in lucene 5 nothing is identical - we use random ids in file headers assertThat(diff.missing, empty()); // check the self diff Store.RecoveryDiff selfDiff = first.recoveryDiff(first); assertThat(selfDiff.identical.size(), equalTo(first.size())); assertThat(selfDiff.different, empty()); assertThat(selfDiff.missing, empty()); // lets add some deletes Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)) .setCodec(TestUtil.getDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); IndexWriter writer = new IndexWriter(store.directory(), iwc); writer.deleteDocuments(new Term("id", Integer.toString(random().nextInt(numDocs)))); writer.commit(); writer.close(); Store.MetadataSnapshot metadata = store.getMetadata(); StoreFileMetaData delFile = null; for (StoreFileMetaData md : metadata) { if (md.name().endsWith(".liv")) { delFile = md; break; } } Store.RecoveryDiff afterDeleteDiff = metadata.recoveryDiff(second); if (delFile != null) { assertThat(afterDeleteDiff.identical.size(), equalTo(metadata.size() - 2)); // segments_N + del file assertThat(afterDeleteDiff.different.size(), equalTo(0)); assertThat(afterDeleteDiff.missing.size(), equalTo(2)); } else { // an entire segment must be missing (single doc segment got dropped) assertThat(afterDeleteDiff.identical.size(), greaterThan(0)); assertThat(afterDeleteDiff.different.size(), equalTo(0)); assertThat(afterDeleteDiff.missing.size(), equalTo(1)); // the commit file is different } // check the self diff selfDiff = metadata.recoveryDiff(metadata); assertThat(selfDiff.identical.size(), equalTo(metadata.size())); assertThat(selfDiff.different, empty()); assertThat(selfDiff.missing, empty()); // add a new commit iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(TestUtil.getDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(true); // force CFS - easier to test here since we know it will add 3 files iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); writer = new IndexWriter(store.directory(), iwc); writer.addDocument(docs.get(0)); writer.close(); Store.MetadataSnapshot newCommitMetaData = store.getMetadata(); Store.RecoveryDiff newCommitDiff = newCommitMetaData.recoveryDiff(metadata); if (delFile != null) { assertThat(newCommitDiff.identical.size(), equalTo(newCommitMetaData.size() - 5)); // segments_N, del file, cfs, cfe, si for the new segment assertThat(newCommitDiff.different.size(), equalTo(1)); // the del file must be different assertThat(newCommitDiff.different.get(0).name(), endsWith(".liv")); assertThat(newCommitDiff.missing.size(), equalTo(4)); // segments_N,cfs, cfe, si for the new segment } else { assertThat(newCommitDiff.identical.size(), equalTo(newCommitMetaData.size() - 4)); // segments_N, cfs, cfe, si for the new segment assertThat(newCommitDiff.different.size(), equalTo(0)); assertThat(newCommitDiff.missing.size(), equalTo(4)); // an entire segment must be missing (single doc segment got dropped) plus the commit is different } deleteContent(store.directory()); IOUtils.close(store); }
From source file:org.eu.bitzone.Leia.java
License:Apache License
private IndexWriter createIndexWriter() { try {// ww w . j av a 2s .co m final IndexWriterConfig cfg = new IndexWriterConfig(LV, new WhitespaceAnalyzer(LV)); IndexDeletionPolicy policy; if (keepCommits) { policy = new KeepAllIndexDeletionPolicy(); } else { policy = new KeepLastIndexDeletionPolicy(); } cfg.setIndexDeletionPolicy(policy); final MergePolicy mp = cfg.getMergePolicy(); cfg.setUseCompoundFile(IndexGate.preferCompoundFormat(dir)); final IndexWriter iw = new IndexWriter(dir, cfg); return iw; } catch (final Exception e) { errorMsg("Error creating IndexWriter: " + e.toString()); return null; } }
From source file:org.eu.bitzone.Leia.java
License:Apache License
/** * Optimize the index.//from w w w . j a v a 2 s . c om */ public void optimize(final Object dialog) { final Thread t = new Thread() { @Override public void run() { IndexWriter iw = null; final Object optimizeButton = find(dialog, "optimizeButton"); setBoolean(optimizeButton, "enabled", false); final Object closeButton = find(dialog, "closeButton"); setBoolean(closeButton, "enabled", false); final Object msg = find(dialog, "msg"); final Object stat = find(dialog, "stat"); setString(stat, "text", "Running ..."); final PanelPrintWriter ppw = new PanelPrintWriter(Leia.this, msg); final boolean useCompound = getBoolean(find(dialog, "optCompound"), "selected"); final boolean expunge = getBoolean(find(dialog, "optExpunge"), "selected"); final boolean keep = getBoolean(find(dialog, "optKeepAll"), "selected"); final boolean useLast = getBoolean(find(dialog, "optLastCommit"), "selected"); final Object tiiSpin = find(dialog, "tii"); final Object segnumSpin = find(dialog, "segnum"); final int tii = Integer.parseInt(getString(tiiSpin, "text")); final int segnum = Integer.parseInt(getString(segnumSpin, "text")); try { if (is != null) { is = null; } if (ir != null) { ir.close(); } if (ar != null) { ar.close(); } IndexDeletionPolicy policy; if (keep) { policy = new KeepAllIndexDeletionPolicy(); } else { policy = new KeepLastIndexDeletionPolicy(); } final IndexWriterConfig cfg = new IndexWriterConfig(LV, new WhitespaceAnalyzer(LV)); if (!useLast) { final IndexCommit ic = ((DirectoryReader) ir).getIndexCommit(); if (ic != null) { cfg.setIndexCommit(ic); } } cfg.setIndexDeletionPolicy(policy); cfg.setTermIndexInterval(tii); final MergePolicy p = cfg.getMergePolicy(); cfg.setUseCompoundFile(useCompound); if (useCompound) { p.setNoCFSRatio(1.0); } cfg.setInfoStream(ppw); iw = new IndexWriter(dir, cfg); final long startSize = Util.calcTotalFileSize(pName, dir); final long startTime = System.currentTimeMillis(); if (expunge) { iw.forceMergeDeletes(); } else { if (segnum > 1) { iw.forceMerge(segnum, true); } else { iw.forceMerge(1, true); } } iw.commit(); final long endTime = System.currentTimeMillis(); final long endSize = Util.calcTotalFileSize(pName, dir); final long deltaSize = startSize - endSize; final String sign = deltaSize < 0 ? " Increased " : " Reduced "; final String sizeMsg = sign + Util.normalizeSize(Math.abs(deltaSize)) + Util.normalizeUnit(Math.abs(deltaSize)); final String timeMsg = String.valueOf(endTime - startTime) + " ms"; showStatus(sizeMsg + " in " + timeMsg); iw.close(); setString(stat, "text", "Finished OK."); } catch (final Exception e) { e.printStackTrace(ppw); setString(stat, "text", "ERROR - aborted."); errorMsg("ERROR optimizing: " + e.toString()); if (iw != null) { try { iw.close(); } catch (final Exception e1) { } } } finally { setBoolean(closeButton, "enabled", true); } try { actionReopen(); is = new IndexSearcher(ir); // add dialog again add(dialog); } catch (final Exception e) { e.printStackTrace(ppw); errorMsg("ERROR reopening after optimize:\n" + e.getMessage()); } } }; t.start(); }
From source file:org.getopt.luke.Luke.java
License:Apache License
private IndexWriter createIndexWriter() { try {//from w w w . j a v a 2s.c o m IndexWriterConfig cfg = new IndexWriterConfig(LV, new WhitespaceAnalyzer(LV)); IndexDeletionPolicy policy; if (keepCommits) { policy = new KeepAllIndexDeletionPolicy(); } else { policy = new KeepLastIndexDeletionPolicy(); } cfg.setIndexDeletionPolicy(policy); cfg.setUseCompoundFile(IndexGate.preferCompoundFormat(dir)); IndexWriter iw = new IndexWriter(dir, cfg); return iw; } catch (Exception e) { errorMsg("Error creating IndexWriter: " + e.toString()); return null; } }
From source file:org.getopt.luke.Luke.java
License:Apache License
/** * Optimize the index./*from ww w. j a v a 2 s . c o m*/ */ public void optimize(final Object dialog) { Thread t = new Thread() { public void run() { IndexWriter iw = null; Object optimizeButton = find(dialog, "optimizeButton"); setBoolean(optimizeButton, "enabled", false); Object closeButton = find(dialog, "closeButton"); setBoolean(closeButton, "enabled", false); Object msg = find(dialog, "msg"); Object stat = find(dialog, "stat"); setString(stat, "text", "Running ..."); PanelPrintWriter ppw = new PanelPrintWriter(Luke.this, msg); boolean useCompound = getBoolean(find(dialog, "optCompound"), "selected"); boolean expunge = getBoolean(find(dialog, "optExpunge"), "selected"); boolean keep = getBoolean(find(dialog, "optKeepAll"), "selected"); boolean useLast = getBoolean(find(dialog, "optLastCommit"), "selected"); Object tiiSpin = find(dialog, "tii"); Object segnumSpin = find(dialog, "segnum"); int tii = Integer.parseInt(getString(tiiSpin, "text")); int segnum = Integer.parseInt(getString(segnumSpin, "text")); try { if (is != null) is = null; if (ir != null) ir.close(); if (ar != null) ar.close(); IndexDeletionPolicy policy; if (keep) { policy = new KeepAllIndexDeletionPolicy(); } else { policy = new KeepLastIndexDeletionPolicy(); } IndexWriterConfig cfg = new IndexWriterConfig(LV, new WhitespaceAnalyzer(LV)); if (!useLast) { IndexCommit ic = ((DirectoryReader) ir).getIndexCommit(); if (ic != null) { cfg.setIndexCommit(ic); } } cfg.setIndexDeletionPolicy(policy); cfg.setTermIndexInterval(tii); cfg.setUseCompoundFile(useCompound); cfg.setInfoStream(ppw); iw = new IndexWriter(dir, cfg); long startSize = Util.calcTotalFileSize(pName, dir); long startTime = System.currentTimeMillis(); if (expunge) { iw.forceMergeDeletes(); } else { if (segnum > 1) { iw.forceMerge(segnum, true); } else { iw.forceMerge(1, true); } } iw.commit(); long endTime = System.currentTimeMillis(); long endSize = Util.calcTotalFileSize(pName, dir); long deltaSize = startSize - endSize; String sign = deltaSize < 0 ? " Increased " : " Reduced "; String sizeMsg = sign + Util.normalizeSize(Math.abs(deltaSize)) + Util.normalizeUnit(Math.abs(deltaSize)); String timeMsg = String.valueOf(endTime - startTime) + " ms"; showStatus(sizeMsg + " in " + timeMsg); iw.close(); setString(stat, "text", "Finished OK."); } catch (Exception e) { e.printStackTrace(ppw); setString(stat, "text", "ERROR - aborted."); errorMsg("ERROR optimizing: " + e.toString()); if (iw != null) try { iw.close(); } catch (Exception e1) { } } finally { setBoolean(closeButton, "enabled", true); } try { actionReopen(); is = new IndexSearcher(ir); // add dialog again add(dialog); } catch (Exception e) { e.printStackTrace(ppw); errorMsg("ERROR reopening after optimize:\n" + e.getMessage()); } } }; t.start(); }
From source file:org.languagetool.dev.FrequencyIndexCreator.java
License:Open Source License
private void run(File inputDir, File indexBaseDir) throws IOException { List<File> files = Arrays.asList(inputDir.listFiles()); Collections.sort(files);/* ww w .j a v a2 s . co m*/ for (File file : files) { String name = file.getName(); if (name.matches(".*_[A-Z]+_.*")) { System.out.println("Skipping POS tag file " + name); continue; } File indexDir; boolean hiveMode; if (name.matches(NAME_REGEX1)) { indexDir = new File(indexBaseDir, name.replaceAll(NAME_REGEX1, "$1")); hiveMode = false; System.out.println("Running in corpus mode (i.e. aggregation of years)"); } else if (name.matches(NAME_REGEX2)) { indexDir = new File(indexBaseDir, name.replaceAll(NAME_REGEX2, "$1")); hiveMode = true; System.out.println("Running in Hive mode (i.e. no aggregation of years)"); } else { System.out.println( "Skipping " + name + " - doesn't match regex " + NAME_REGEX1 + " or " + NAME_REGEX2); continue; } if (indexDir.exists() && indexDir.isDirectory()) { System.out.println("Skipping " + name + " - index dir '" + indexDir + "' already exists"); continue; } System.out.println("Index dir: " + indexDir); Directory directory = FSDirectory.open(indexDir); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer); config.setUseCompoundFile(false); // ~10% speedup //config.setRAMBufferSizeMB(1000); try (IndexWriter writer = new IndexWriter(directory, config)) { indexLinesFromGoogleFile(writer, file, hiveMode); } } }
From source file:org.meresco.lucene.numerate.UriEnumerate.java
License:Open Source License
/** * * @param path// w ww . j a va 2 s .c o m * @param max_cache_size * @param withTransactionLog allows for crash recovery, but slows down UriNumerate considerably because of file system flush. * @throws IOException */ public UriEnumerate(String path, int max_cache_size, boolean withTransactionLog) throws IOException { IndexWriterConfig config = new IndexWriterConfig(null); ConcurrentMergeScheduler ms = (ConcurrentMergeScheduler) config.getMergeScheduler(); ms.setDefaultMaxMergesAndThreads(/* spins= */false); LogDocMergePolicy mp = new LogDocMergePolicy(); mp.setMergeFactor(2); mp.setMinMergeDocs(max_cache_size); config.setMergePolicy(mp); config.setCodec(new Lucene60Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return new BloomFilteringPostingsFormat(super.getPostingsFormatForField(field)); } }); config.setUseCompoundFile(false); this.writer = new IndexWriter(FSDirectory.open(FileSystems.getDefault().getPath(path)), config); this.next_ord = writer.numDocs() + 1; this.searcher = new SimpleSearcher(this.writer); this.cache = new Cache(max_cache_size, () -> this.commit()); this.transactionLog = new TransactionLog(withTransactionLog ? path + "/transactionLog" : null); this.transactionLog.maybeRecover(); }
From source file:org.neo4j.kernel.api.impl.index.IndexWriterConfigs.java
License:Open Source License
public static IndexWriterConfig standard() { IndexWriterConfig writerConfig = new IndexWriterConfig(LuceneDataSource.KEYWORD_ANALYZER); writerConfig.setMaxBufferedDocs(MAX_BUFFERED_DOCS); writerConfig.setIndexDeletionPolicy(new MultipleBackupDeletionPolicy()); writerConfig.setUseCompoundFile(true); writerConfig.setCodec(new Lucene54Codec() { @Override//w w w.j a v a 2 s . co m public PostingsFormat getPostingsFormatForField(String field) { PostingsFormat postingFormat = super.getPostingsFormatForField(field); return CODEC_BLOCK_TREE_ORDS_POSTING_FORMAT ? blockTreeOrdsPostingsFormat : postingFormat; } }); LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setNoCFSRatio(MERGE_POLICY_NO_CFS_RATIO); mergePolicy.setMinMergeMB(MERGE_POLICY_MIN_MERGE_MB); mergePolicy.setMergeFactor(MERGE_POLICY_MERGE_FACTOR); writerConfig.setMergePolicy(mergePolicy); return writerConfig; }
From source file:perf.IDPerfTest.java
License:Apache License
private static Result testOne(String indexPath, String desc, IDIterator ids, final int minTermsInBlock, final int maxTermsInBlock) throws IOException { System.out.println("\ntest: " + desc + " termBlocks=" + minTermsInBlock + "/" + maxTermsInBlock); Directory dir = FSDirectory.open(new File(indexPath)); //IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48)); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_8, new StandardAnalyzer(Version.LUCENE_4_8)); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // So I can walk the files and get the *.tip sizes: iwc.setUseCompoundFile(false); iwc.setCodec(new Lucene53Codec() { @Override//from www . ja va 2s.c om public PostingsFormat getPostingsFormatForField(String field) { return new Lucene50PostingsFormat(minTermsInBlock, maxTermsInBlock); } }); /// 7/7/7 segment structure: iwc.setMaxBufferedDocs(ID_COUNT / 777); iwc.setRAMBufferSizeMB(-1); //iwc.setInfoStream(new PrintStreamInfoStream(System.out)); //iwc.setMergePolicy(new LogDocMergePolicy()); ((TieredMergePolicy) iwc.getMergePolicy()).setFloorSegmentMB(.001); ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(0.0); //((LogDocMergePolicy) iwc.getMergePolicy()).setMinMergeDocs(1000); iwc.getMergePolicy().setNoCFSRatio(0.0); IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); FieldType ft = new FieldType(StringField.TYPE_NOT_STORED); ft.setTokenized(true); ft.freeze(); BytesRef idValue = new BytesRef(64); Field idField = new Field("id", new BinaryTokenStream(idValue), ft); doc.add(idField); long t0 = System.nanoTime(); BytesRef[] lookupIDs = new BytesRef[ID_SEARCH_COUNT]; Random random = new Random(17); int lookupCount = 0; double rate = 1.01 * ((double) ID_SEARCH_COUNT) / ID_COUNT; for (int i = 0; i < ID_COUNT; i++) { ids.next(idValue); if (lookupCount < lookupIDs.length && random.nextDouble() <= rate) { lookupIDs[lookupCount++] = BytesRef.deepCopyOf(idValue); } // Trickery: the idsIter changed the idValue which the BinaryTokenStream reuses for each added doc w.addDocument(doc); } if (lookupCount < lookupIDs.length) { throw new RuntimeException("didn't get enough lookup ids: " + lookupCount + " vs " + lookupIDs.length); } long indexTime = System.nanoTime() - t0; System.out.println(" indexing done; waitForMerges..."); w.waitForMerges(); IndexReader r = DirectoryReader.open(w, true); System.out.println(" reader=" + r); shuffle(random, lookupIDs); shuffle(random, lookupIDs); long bestTime = Long.MAX_VALUE; long checksum = 0; List<AtomicReaderContext> leaves = new ArrayList<>(r.leaves()); // Sort largest to smallest: Collections.sort(leaves, new Comparator<AtomicReaderContext>() { @Override public int compare(AtomicReaderContext c1, AtomicReaderContext c2) { return c2.reader().maxDoc() - c1.reader().maxDoc(); } }); TermsEnum[] termsEnums = new TermsEnum[leaves.size()]; DocsEnum[] docsEnums = new DocsEnum[leaves.size()]; int[] docBases = new int[leaves.size()]; for (int i = 0; i < leaves.size(); i++) { //System.out.println("i=" + i + " count=" + leaves.get(i).reader().maxDoc()); termsEnums[i] = leaves.get(i).reader().fields().terms("id").iterator(null); docBases[i] = leaves.get(i).docBase; } long rawLookupCount = 0; int countx = 0; for (int iter = 0; iter < 5; iter++) { t0 = System.nanoTime(); BlockTreeTermsReader.seekExactFastNotFound = 0; BlockTreeTermsReader.seekExactFastRootNotFound = 0; rawLookupCount = 0; for (BytesRef id : lookupIDs) { if (countx++ < 50) { System.out.println(" id=" + id); } boolean found = false; for (int seg = 0; seg < termsEnums.length; seg++) { rawLookupCount++; if (termsEnums[seg].seekExact(id)) { docsEnums[seg] = termsEnums[seg].docs(null, docsEnums[seg], 0); int docID = docsEnums[seg].nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { // uh-oh! throw new RuntimeException("id not found: " + id); } // paranoia: checksum += docID + docBases[seg]; found = true; // Optimization vs MultiFields: we don't need to check any more segments since id is PK break; } } if (found == false) { // uh-oh! throw new RuntimeException("id not found: " + id); } } long lookupTime = System.nanoTime() - t0; System.out.println(String.format(Locale.ROOT, " iter=" + iter + " lookupTime=%.3f sec", lookupTime / 1000000000.0)); if (lookupTime < bestTime) { bestTime = lookupTime; System.out.println(" **"); } } long totalBytes = 0; long termsIndexTotalBytes = 0; for (String fileName : dir.listAll()) { long bytes = dir.fileLength(fileName); totalBytes += bytes; if (fileName.endsWith(".tip")) { termsIndexTotalBytes += bytes; } } r.close(); w.rollback(); dir.close(); return new Result(desc, ID_COUNT / (indexTime / 1000000.0), lookupIDs.length / (bestTime / 1000000.0), totalBytes, termsIndexTotalBytes, checksum, BlockTreeTermsReader.seekExactFastNotFound, BlockTreeTermsReader.seekExactFastRootNotFound, rawLookupCount, minTermsInBlock, maxTermsInBlock); }