List of usage examples for org.apache.lucene.index IndexWriter forceMergeDeletes
public void forceMergeDeletes() throws IOException
From source file:cn.hbu.cs.esearch.index.DiskLuceneIndexDataLoader.java
License:Apache License
public void expungeDeletes() throws IOException { log.info("expunging deletes..."); synchronized (optimizeMonitor) { BaseSearchIndex<R> idx = getSearchIndex(); IndexWriter writer = null; try {// www .ja v a 2 s.com writer = idx.openIndexWriter(_analyzer, _similarity); writer.forceMergeDeletes(); } finally { if (writer != null) { idx.closeIndexWriter(); } } _idxMgr.refreshDiskReader(); } log.info("deletes expunged"); }
From source file:com.github.wxiaoqi.search.lucene.LuceneDao.java
License:Open Source License
public void deleteAll() { IndexWriter indexWriter = null; try {// w w w . ja va 2 s .c o m IndexWriterConfig config = new IndexWriterConfig(this.getAnalyzer()); indexWriter = new IndexWriter(this.getDirectory(), config); Long result = indexWriter.deleteAll(); /**/ indexWriter.forceMergeDeletes(); log.info("deleted:{}", result); } catch (Exception e) { e.printStackTrace(); try { indexWriter.rollback(); } catch (IOException e1) { e1.printStackTrace(); } } finally { try { indexWriter.close(); } catch (IOException e1) { e1.printStackTrace(); } } }
From source file:com.orientechnologies.lucene.engine.OLuceneStorage.java
License:Apache License
public void commit() { try {/*from ww w. j a v a 2 s .com*/ OLogManager.instance().info(this, "committing"); final IndexWriter indexWriter = mgrWriter.getIndexWriter(); indexWriter.forceMergeDeletes(); indexWriter.commit(); } catch (IOException e) { OLogManager.instance().error(this, "Error on committing Lucene index", e); } }
From source file:com.sxc.lucene.index.IndexingTest.java
License:Apache License
public void testDeleteAfterOptimize() throws IOException { IndexWriter writer = getWriter(); assertEquals(2, writer.numDocs());//www . j a va 2s . c o m writer.deleteDocuments(new Term("id", "1")); writer.forceMergeDeletes(); // 3 writer.commit(); assertFalse(writer.hasDeletions()); assertEquals(1, writer.maxDoc()); // C assertEquals(1, writer.numDocs()); // C writer.close(); }
From source file:de.tudarmstadt.lt.lm.app.GenerateNgramIndex.java
License:Apache License
public void create_ngram_index(File ngram_joined_counts_file) throws IOException { File index_dir = new File(_index_dir, "ngram"); if (index_dir.exists()) { LOG.info("Ngram index already exists in directory '{}'.", index_dir.getAbsolutePath()); if (_overwrite) { LOG.info("Overwriting index '{}',", index_dir); index_dir.delete();//from w w w . ja v a 2 s . c o m } else return; } index_dir.mkdirs(); Analyzer analyzer = new KeywordAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); iwc.setOpenMode(OpenMode.CREATE); // use 80 percent of the available total memory double total_mem_mb = (double) Runtime.getRuntime().maxMemory() / 1e6; double percentage_ram_buffer = Properties.ramBufferPercentage(); if (percentage_ram_buffer > 0) { double percentage_ram_buffer_mb = total_mem_mb * percentage_ram_buffer; LOG.info(String.format("Setting ram buffer size to %.2f MB (%.2f%% from %.2f MB)", percentage_ram_buffer_mb, percentage_ram_buffer * 100, total_mem_mb)); iwc.setRAMBufferSizeMB(percentage_ram_buffer_mb); } Directory directory = new MMapDirectory(index_dir); IndexWriter writer_ngram = new IndexWriter(directory, iwc); InputStream in = new FileInputStream(ngram_joined_counts_file); if (ngram_joined_counts_file.getName().endsWith(".gz")) in = new GZIPInputStream(in); LineIterator iter = new LineIterator(new BufferedReader(new InputStreamReader(in, "UTF-8"))); Document doc = new Document(); Field f_ngram = new StringField("ngram", "", Store.YES); doc.add(f_ngram); Field f_n = new IntField("cardinality", 0, Store.YES); doc.add(f_n); Field f_word = new StringField("word", "", Store.YES); doc.add(f_word); Field f_hist = new StringField("history", "", Store.YES); doc.add(f_hist); Field f_lower = new StringField("lower", "", Store.YES); doc.add(f_lower); Field f_count = new StoredField("num", 0L); doc.add(f_count); Field[] f_follow = new Field[4]; f_follow[0] = new StoredField("nf_s", 0L); doc.add(f_follow[0]); f_follow[1] = new StoredField("nf_N1", 0L); doc.add(f_follow[1]); f_follow[2] = new StoredField("nf_N2", 0L); doc.add(f_follow[2]); f_follow[3] = new StoredField("nf_N3", 0L); doc.add(f_follow[3]); Field[] f_precede = new Field[4]; f_precede[0] = new StoredField("np_s", 0L); doc.add(f_precede[0]); f_precede[1] = new StoredField("np_N1", 0L); doc.add(f_precede[1]); f_precede[2] = new StoredField("np_N2", 0L); doc.add(f_precede[2]); f_precede[3] = new StoredField("np_N3", 0L); doc.add(f_precede[3]); Field[] f_followerprecede = new Field[4]; f_followerprecede[0] = new StoredField("nfp_s", 0L); doc.add(f_followerprecede[0]); f_followerprecede[1] = new StoredField("nfp_N1", 0L); doc.add(f_followerprecede[1]); f_followerprecede[2] = new StoredField("nfp_N2", 0L); doc.add(f_followerprecede[2]); f_followerprecede[3] = new StoredField("nfp_N3", 0L); doc.add(f_followerprecede[3]); Long[][] N = new Long[][] { { 0L, 0L, 0L, 0L, 0L, 0L } }; Long[] S = new Long[] { 0L }; long c = 0; while (iter.hasNext()) { if (++c % 100000 == 0) LOG.info("Adding {}'th ngram.", c); String line = iter.next(); try { String[] splits = de.tudarmstadt.lt.utilities.StringUtils.rtrim(line).split("\t"); String ngram_str = splits[0]; if (de.tudarmstadt.lt.utilities.StringUtils.trim(ngram_str).isEmpty()) { LOG.warn("Ngram is empty, skipping line {}: '{}' (file '{}').", c, line, ngram_joined_counts_file); continue; } List<String> ngram = Arrays.asList(ngram_str.split(" ")); long num = Long.parseLong(splits[1]); int n = ngram.size(); f_ngram.setStringValue(ngram_str); f_n.setIntValue(n); f_word.setStringValue(ngram.get(ngram.size() - 1)); f_hist.setStringValue(StringUtils.join(ngram.subList(0, ngram.size() - 1), " ")); f_lower.setStringValue(StringUtils.join(ngram.subList(1, ngram.size()), " ")); f_count.setLongValue(num); for (int j = 0; j < f_follow.length; j++) { f_follow[j].setLongValue(0L); f_precede[j].setLongValue(0L); f_followerprecede[j].setLongValue(0L); } if (splits.length > 2 && !splits[2].isEmpty()) { // precede or follow or followerprecede String[] splits_ = splits[2].split(":"); String type = splits_[0]; String[] count_values = splits_[1].split(","); if (count_values.length > 0) { if ("n_f".equals(type)) f_follow[0].setLongValue(Long.parseLong(count_values[0])); else if ("n_p".equals(type)) f_precede[0].setLongValue(Long.parseLong(count_values[0])); else if ("n_fp".equals(type)) f_followerprecede[0].setLongValue(Long.parseLong(count_values[0])); } for (int i = 1; i < count_values.length; i++) { if ("n_f".equals(type)) f_follow[i].setLongValue(Long.parseLong(count_values[i])); else if ("n_p".equals(type)) f_precede[i].setLongValue(Long.parseLong(count_values[i])); else if ("n_fp".equals(type)) f_followerprecede[i].setLongValue(Long.parseLong(count_values[i])); } } if (splits.length > 3 && !splits[3].isEmpty()) { // should be follow or followerprecede String[] splits_ = splits[3].split(":"); String type = splits_[0]; String[] count_values = splits_[1].split(","); if (count_values.length > 0) { if ("n_f".equals(type)) f_follow[0].setLongValue(Long.parseLong(count_values[0])); else if ("n_p".equals(type)) f_precede[0].setLongValue(Long.parseLong(count_values[0])); else if ("n_fp".equals(type)) f_followerprecede[0].setLongValue(Long.parseLong(count_values[0])); } for (int i = 1; i < count_values.length; i++) { if ("n_f".equals(type)) f_follow[i].setLongValue(Long.parseLong(count_values[i])); else if ("n_p".equals(type)) f_precede[i].setLongValue(Long.parseLong(count_values[i])); else if ("n_fp".equals(type)) f_followerprecede[i].setLongValue(Long.parseLong(count_values[i])); } } if (splits.length > 4 && !splits[4].isEmpty()) { // should be followerprecede String[] splits_ = splits[4].split(":"); String type = splits_[0]; String[] count_values = splits_[1].split(","); if (count_values.length > 0) { if ("n_f".equals(type)) f_follow[0].setLongValue(Long.parseLong(count_values[0])); else if ("n_p".equals(type)) f_precede[0].setLongValue(Long.parseLong(count_values[0])); else if ("n_fp".equals(type)) f_followerprecede[0].setLongValue(Long.parseLong(count_values[0])); } for (int i = 1; i < count_values.length; i++) { if ("n_f".equals(type)) f_follow[i].setLongValue(Long.parseLong(count_values[i])); else if ("n_p".equals(type)) f_precede[i].setLongValue(Long.parseLong(count_values[i])); else if ("n_fp".equals(type)) f_followerprecede[i].setLongValue(Long.parseLong(count_values[i])); } } writer_ngram.addDocument(doc); while (N.length <= n) { N = ArrayUtils.getConcatinatedArray(N, new Long[][] { { 0L, 0L, 0L, 0L, 0L, 0L } }); S = ArrayUtils.getConcatinatedArray(S, new Long[] { 0L }); } if (num == 1L) N[n][1]++; else if (num == 2L) N[n][2]++; else if (num == 3L) N[n][3]++; else if (num == 4L) N[n][4]++; else N[n][5]++; N[n][0]++; S[n] += num; } catch (Exception e) { LOG.error("Could not process line '{}' in file '{}:{}', malformed line.", line, ngram_joined_counts_file, c, e); } } writer_ngram.forceMergeDeletes(); writer_ngram.commit(); writer_ngram.close(); StringBuilder b = new StringBuilder(String.format( "#%n# Number of times where an ngram occurred: %n# at_least_once, exactly_once, exactly_twice, exactly_three_times, exactly_four_times, five_times_or_more.%n#%nmax_n=%d%nmax_c=6%n", N.length - 1)); for (int n = 1; n < N.length; n++) b.append(String.format("n%d=%s%n", n, StringUtils.join(N[n], ','))); for (int n = 1; n < S.length; n++) b.append(String.format("s%d=%d%n", n, S[n])); FileUtils.writeStringToFile(new File(_index_dir, "__sum_ngrams__"), b.toString()); }
From source file:de.tudarmstadt.lt.lm.app.GenerateNgramIndex.java
License:Apache License
public void create_vocabulary_index(File vocabulary_file) throws IOException { File index_dir = new File(_index_dir, "vocab"); if (index_dir.exists()) { LOG.info("Vocabulary index already exists in directory '{}'.", index_dir.getAbsolutePath()); if (_overwrite) { LOG.info("Overwriting index '{}',", index_dir); index_dir.delete();/*w w w . j av a 2 s .c o m*/ } else return; } index_dir.mkdirs(); Analyzer analyzer = new KeywordAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(1024.0); Directory directory = new MMapDirectory(index_dir); IndexWriter writer_vocab = new IndexWriter(directory, iwc); InputStream in = new FileInputStream(vocabulary_file); if (vocabulary_file.getName().endsWith(".gz")) in = new GZIPInputStream(in); LineIterator iter = new LineIterator(new BufferedReader(new InputStreamReader(in, "UTF-8"))); Document doc = new Document(); Field f_word = new StringField("word", "", Field.Store.YES); doc.add(f_word); long c = 0; while (iter.hasNext()) { if (++c % 10000 == 0) LOG.info("Adding {}'th word.", c); String line = iter.next(); try { String word = line.trim(); f_word.setStringValue(word); writer_vocab.addDocument(doc); } catch (Exception e) { LOG.warn("Could not process line '{}' in file '{}', malformed line.", line, vocabulary_file, e); } } writer_vocab.forceMergeDeletes(); writer_vocab.commit(); writer_vocab.close(); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexTest.java
License:Apache License
private void purgeDeletedDocs(NodeBuilder idx, IndexDefinition definition) throws IOException { IndexWriter writer = new IndexWriter(newIndexDirectory(definition, idx), getIndexWriterConfig(definition, true)); writer.forceMergeDeletes(); writer.close();/*from w w w . j a v a 2 s .c o m*/ }
From source file:org.apache.solr.update.DirectUpdateHandler2.java
License:Apache License
@Override public void commit(CommitUpdateCommand cmd) throws IOException { if (cmd.prepareCommit) { prepareCommit(cmd);//from w w w . j av a 2s. c om return; } if (cmd.optimize) { optimizeCommands.incrementAndGet(); } else { commitCommands.incrementAndGet(); if (cmd.expungeDeletes) expungeDeleteCommands.incrementAndGet(); } Future[] waitSearcher = null; if (cmd.waitSearcher) { waitSearcher = new Future[1]; } boolean error = true; try { // only allow one hard commit to proceed at once if (!cmd.softCommit) { solrCoreState.getCommitLock().lock(); } log.info("start " + cmd); // We must cancel pending commits *before* we actually execute the commit. if (cmd.openSearcher) { // we can cancel any pending soft commits if this commit will open a new searcher softCommitTracker.cancelPendingCommit(); } if (!cmd.softCommit && (cmd.openSearcher || !commitTracker.getOpenSearcher())) { // cancel a pending hard commit if this commit is of equal or greater "strength"... // If the autoCommit has openSearcher=true, then this commit must have openSearcher=true // to cancel. commitTracker.cancelPendingCommit(); } RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core); try { IndexWriter writer = iw.get(); if (cmd.optimize) { writer.forceMerge(cmd.maxOptimizeSegments); } else if (cmd.expungeDeletes) { writer.forceMergeDeletes(); } if (!cmd.softCommit) { synchronized (solrCoreState.getUpdateLock()) { // sync is currently needed to prevent preCommit // from being called between preSoft and // postSoft... see postSoft comments. if (ulog != null) ulog.preCommit(cmd); } // SolrCore.verbose("writer.commit() start writer=",writer); if (writer.hasUncommittedChanges()) { final Map<String, String> commitData = new HashMap<String, String>(); commitData.put(SolrIndexWriter.COMMIT_TIME_MSEC_KEY, String.valueOf(System.currentTimeMillis())); writer.setCommitData(commitData); writer.commit(); } else { log.info("No uncommitted changes. Skipping IW.commit."); } // SolrCore.verbose("writer.commit() end"); numDocsPending.set(0); callPostCommitCallbacks(); } else { callPostSoftCommitCallbacks(); } } finally { iw.decref(); } if (cmd.optimize) { callPostOptimizeCallbacks(); } if (cmd.softCommit) { // ulog.preSoftCommit(); synchronized (solrCoreState.getUpdateLock()) { if (ulog != null) ulog.preSoftCommit(cmd); core.getSearcher(true, false, waitSearcher, true); if (ulog != null) ulog.postSoftCommit(cmd); } // ulog.postSoftCommit(); } else { synchronized (solrCoreState.getUpdateLock()) { if (ulog != null) ulog.preSoftCommit(cmd); if (cmd.openSearcher) { core.getSearcher(true, false, waitSearcher); } else { // force open a new realtime searcher so realtime-get and versioning code can see the latest RefCounted<SolrIndexSearcher> searchHolder = core.openNewSearcher(true, true); searchHolder.decref(); } if (ulog != null) ulog.postSoftCommit(cmd); } if (ulog != null) ulog.postCommit(cmd); // postCommit currently means new searcher has // also been opened } // reset commit tracking if (cmd.softCommit) { softCommitTracker.didCommit(); } else { commitTracker.didCommit(); } log.info("end_commit_flush"); error = false; } finally { if (!cmd.softCommit) { solrCoreState.getCommitLock().unlock(); } addCommands.set(0); deleteByIdCommands.set(0); deleteByQueryCommands.set(0); if (error) numErrors.incrementAndGet(); } // if we are supposed to wait for the searcher to be registered, then we should do it // outside any synchronized block so that other update operations can proceed. if (waitSearcher != null && waitSearcher[0] != null) { try { waitSearcher[0].get(); } catch (InterruptedException e) { SolrException.log(log, e); } catch (ExecutionException e) { SolrException.log(log, e); } } }
From source file:org.elasticsearch.index.percolator.PercolatorQueryCacheTests.java
License:Apache License
public void testInvalidateEntries() throws Exception { Directory directory = newDirectory(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE)); storeQuery("0", indexWriter, termQuery("a", "0"), true, false); indexWriter.flush();//from w w w. j a va 2 s .c o m storeQuery("1", indexWriter, termQuery("a", "1"), true, false); indexWriter.flush(); storeQuery("2", indexWriter, termQuery("a", "2"), true, false); indexWriter.flush(); ShardId shardId = new ShardId("_index", ClusterState.UNKNOWN_UUID, 0); IndexReader indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(indexWriter), shardId); assertThat(indexReader.leaves().size(), equalTo(3)); assertThat(indexReader.maxDoc(), equalTo(3)); initialize("a", "type=keyword"); IndexShard indexShard = mockIndexShard(Version.CURRENT, false); ThreadPool threadPool = mockThreadPool(); IndexWarmer.Listener listener = cache.createListener(threadPool); listener.warmReader(indexShard, new Engine.Searcher("test", new IndexSearcher(indexReader))); assertThat(cache.getStats(shardId).getNumQueries(), equalTo(3L)); PercolateQuery.QueryRegistry.Leaf leaf = cache.getQueries(indexReader.leaves().get(0)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "0")))); leaf = cache.getQueries(indexReader.leaves().get(1)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "1")))); leaf = cache.getQueries(indexReader.leaves().get(2)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "2")))); // change merge policy, so that merges will actually happen: indexWriter.getConfig().setMergePolicy(new TieredMergePolicy()); indexWriter.deleteDocuments(new Term("id", "1")); indexWriter.forceMergeDeletes(); indexReader.close(); indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(indexWriter), shardId); assertThat(indexReader.leaves().size(), equalTo(2)); assertThat(indexReader.maxDoc(), equalTo(2)); listener.warmReader(indexShard, new Engine.Searcher("test", new IndexSearcher(indexReader))); assertThat(cache.getStats(shardId).getNumQueries(), equalTo(2L)); leaf = cache.getQueries(indexReader.leaves().get(0)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "0")))); leaf = cache.getQueries(indexReader.leaves().get(1)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "2")))); indexWriter.forceMerge(1); indexReader.close(); indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(indexWriter), shardId); assertThat(indexReader.leaves().size(), equalTo(1)); assertThat(indexReader.maxDoc(), equalTo(2)); listener.warmReader(indexShard, new Engine.Searcher("test", new IndexSearcher(indexReader))); assertThat(cache.getStats(shardId).getNumQueries(), equalTo(2L)); leaf = cache.getQueries(indexReader.leaves().get(0)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "0")))); assertThat(leaf.getQuery(1), equalTo(new TermQuery(new Term("a", "2")))); indexWriter.close(); indexReader.close(); directory.close(); }
From source file:org.eu.bitzone.Leia.java
License:Apache License
/** * Optimize the index./*from ww w.j a v a 2 s. c om*/ */ public void optimize(final Object dialog) { final Thread t = new Thread() { @Override public void run() { IndexWriter iw = null; final Object optimizeButton = find(dialog, "optimizeButton"); setBoolean(optimizeButton, "enabled", false); final Object closeButton = find(dialog, "closeButton"); setBoolean(closeButton, "enabled", false); final Object msg = find(dialog, "msg"); final Object stat = find(dialog, "stat"); setString(stat, "text", "Running ..."); final PanelPrintWriter ppw = new PanelPrintWriter(Leia.this, msg); final boolean useCompound = getBoolean(find(dialog, "optCompound"), "selected"); final boolean expunge = getBoolean(find(dialog, "optExpunge"), "selected"); final boolean keep = getBoolean(find(dialog, "optKeepAll"), "selected"); final boolean useLast = getBoolean(find(dialog, "optLastCommit"), "selected"); final Object tiiSpin = find(dialog, "tii"); final Object segnumSpin = find(dialog, "segnum"); final int tii = Integer.parseInt(getString(tiiSpin, "text")); final int segnum = Integer.parseInt(getString(segnumSpin, "text")); try { if (is != null) { is = null; } if (ir != null) { ir.close(); } if (ar != null) { ar.close(); } IndexDeletionPolicy policy; if (keep) { policy = new KeepAllIndexDeletionPolicy(); } else { policy = new KeepLastIndexDeletionPolicy(); } final IndexWriterConfig cfg = new IndexWriterConfig(LV, new WhitespaceAnalyzer(LV)); if (!useLast) { final IndexCommit ic = ((DirectoryReader) ir).getIndexCommit(); if (ic != null) { cfg.setIndexCommit(ic); } } cfg.setIndexDeletionPolicy(policy); cfg.setTermIndexInterval(tii); final MergePolicy p = cfg.getMergePolicy(); cfg.setUseCompoundFile(useCompound); if (useCompound) { p.setNoCFSRatio(1.0); } cfg.setInfoStream(ppw); iw = new IndexWriter(dir, cfg); final long startSize = Util.calcTotalFileSize(pName, dir); final long startTime = System.currentTimeMillis(); if (expunge) { iw.forceMergeDeletes(); } else { if (segnum > 1) { iw.forceMerge(segnum, true); } else { iw.forceMerge(1, true); } } iw.commit(); final long endTime = System.currentTimeMillis(); final long endSize = Util.calcTotalFileSize(pName, dir); final long deltaSize = startSize - endSize; final String sign = deltaSize < 0 ? " Increased " : " Reduced "; final String sizeMsg = sign + Util.normalizeSize(Math.abs(deltaSize)) + Util.normalizeUnit(Math.abs(deltaSize)); final String timeMsg = String.valueOf(endTime - startTime) + " ms"; showStatus(sizeMsg + " in " + timeMsg); iw.close(); setString(stat, "text", "Finished OK."); } catch (final Exception e) { e.printStackTrace(ppw); setString(stat, "text", "ERROR - aborted."); errorMsg("ERROR optimizing: " + e.toString()); if (iw != null) { try { iw.close(); } catch (final Exception e1) { } } } finally { setBoolean(closeButton, "enabled", true); } try { actionReopen(); is = new IndexSearcher(ir); // add dialog again add(dialog); } catch (final Exception e) { e.printStackTrace(ppw); errorMsg("ERROR reopening after optimize:\n" + e.getMessage()); } } }; t.start(); }