List of usage examples for org.apache.lucene.index IndexWriter deleteUnusedFiles
public synchronized void deleteUnusedFiles() throws IOException
From source file:WriteIndex.java
License:Apache License
/** * @param args/*from w w w.j a v a 2 s. c om*/ */ public static void main(String[] args) throws IOException { File docs = new File("documents"); File indexDir = new File(INDEX_DIRECTORY); Directory directory = FSDirectory.open(indexDir); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, analyzer); IndexWriter writer = new IndexWriter(directory, conf); writer.deleteAll(); for (File file : docs.listFiles()) { Metadata metadata = new Metadata(); ContentHandler handler = new BodyContentHandler(); ParseContext context = new ParseContext(); Parser parser = new AutoDetectParser(); InputStream stream = new FileInputStream(file); try { parser.parse(stream, handler, metadata, context); } catch (TikaException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } finally { stream.close(); } String text = handler.toString(); String fileName = file.getName(); Document doc = new Document(); doc.add(new Field("file", fileName, Store.YES, Index.NO)); for (String key : metadata.names()) { String name = key.toLowerCase(); String value = metadata.get(key); if (StringUtils.isBlank(value)) { continue; } if ("keywords".equalsIgnoreCase(key)) { for (String keyword : value.split(",?(\\s+)")) { doc.add(new Field(name, keyword, Store.YES, Index.NOT_ANALYZED)); } } else if ("title".equalsIgnoreCase(key)) { doc.add(new Field(name, value, Store.YES, Index.ANALYZED)); } else { doc.add(new Field(name, fileName, Store.YES, Index.NOT_ANALYZED)); } } doc.add(new Field("text", text, Store.NO, Index.ANALYZED)); writer.addDocument(doc); } writer.commit(); writer.deleteUnusedFiles(); System.out.println(writer.maxDoc() + " documents written"); }
From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java
License:Open Source License
private void handleBackup(Operation op, BackupRequest req) throws Throwable { SnapshotDeletionPolicy snapshotter = null; IndexCommit commit = null;/*from w w w . ja v a2s .c om*/ handleMaintenanceImpl(true); IndexWriter w = this.writer; if (w == null) { op.fail(new CancellationException()); return; } try { // Create a snapshot so the index files won't be deleted. snapshotter = (SnapshotDeletionPolicy) w.getConfig().getIndexDeletionPolicy(); commit = snapshotter.snapshot(); String indexDirectory = UriUtils.buildUriPath(getHost().getStorageSandbox().getPath(), FILE_PATH_LUCENE); // Add the files in the commit to a zip file. List<URI> fileList = FileUtils.filesToUris(indexDirectory, commit.getFileNames()); req.backupFile = FileUtils.zipFiles(fileList, this.indexDirectory + "-" + Utils.getNowMicrosUtc()); op.setBody(req).complete(); } catch (Exception e) { this.logSevere(e); throw e; } finally { if (snapshotter != null) { snapshotter.release(commit); } w.deleteUnusedFiles(); } }
From source file:com.vmware.xenon.services.common.LuceneBlobIndexService.java
License:Open Source License
private void handleMaintenanceSafe(Operation post) { try {/*www. ja v a 2 s . c o m*/ IndexWriter w = this.writer; if (w == null) { post.complete(); return; } w.commit(); setStat(LuceneDocumentIndexService.STAT_NAME_INDEXED_DOCUMENT_COUNT, w.maxDoc()); File directory = new File(new File(getHost().getStorageSandbox()), this.indexDirectory); String[] list = directory.list(); int count = list == null ? 0 : list.length; if (count > LuceneDocumentIndexService.getIndexFileCountThresholdForWriterRefresh()) { logInfo("Index file count: %d, document count: %d", count, w.maxDoc()); closeSearcherSafe(); w.deleteUnusedFiles(); } // Periodically free the buffer. If we are busy serializing requests, they will be ahead of // maintenance in the single threaded executor queue, so they will get to re-use the existing // allocation this.buffer = null; post.complete(); } catch (Throwable e) { logSevere(e); post.fail(e); } }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexBackupService.java
License:Open Source License
private void takeSnapshot(Path destinationPath, boolean isZipBackup, InternalDocumentIndexInfo indexInfo) throws IOException { IndexWriter writer = indexInfo.indexWriter; boolean isInMemoryIndex = indexInfo.indexDirectory == null; URI storageSandbox = getHost().getStorageSandbox(); SnapshotDeletionPolicy snapshotter = null; IndexCommit commit = null;/*from w ww . j a v a2s . c o m*/ long backupStartTime = System.currentTimeMillis(); try { // Create a snapshot so the index files won't be deleted. writer.commit(); snapshotter = (SnapshotDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); commit = snapshotter.snapshot(); if (isZipBackup) { Path tempDir = null; try { List<URI> fileList = new ArrayList<>(); if (isInMemoryIndex) { tempDir = Files.createTempDirectory("lucene-in-memory-backup"); copyInMemoryLuceneIndexToDirectory(commit, tempDir); List<URI> files = Files.list(tempDir).map(Path::toUri).collect(toList()); fileList.addAll(files); } else { Path indexDirectoryPath = Paths.get(storageSandbox).resolve(indexInfo.indexDirectory); List<URI> files = commit.getFileNames().stream().map(indexDirectoryPath::resolve) .map(Path::toUri).collect(toList()); fileList.addAll(files); } // Add files in the commit to a zip file. FileUtils.zipFiles(fileList, destinationPath.toFile()); } finally { if (tempDir != null) { FileUtils.deleteFiles(tempDir.toFile()); } } } else { // incremental backup // create destination dir if not exist if (!Files.exists(destinationPath)) { Files.createDirectory(destinationPath); } Set<String> sourceFileNames = new HashSet<>(commit.getFileNames()); Set<String> destFileNames = Files.list(destinationPath).filter(Files::isRegularFile) .map(path -> path.getFileName().toString()).collect(toSet()); Path tempDir = null; try { Path indexDirectoryPath; if (isInMemoryIndex) { // copy files into temp directory and point index directory path to temp dir tempDir = Files.createTempDirectory("lucene-in-memory-backup"); copyInMemoryLuceneIndexToDirectory(commit, tempDir); indexDirectoryPath = tempDir; } else { indexDirectoryPath = Paths.get(storageSandbox).resolve(indexInfo.indexDirectory); } // add files exist in source but not in dest Set<String> toAdd = new HashSet<>(sourceFileNames); toAdd.removeAll(destFileNames); for (String filename : toAdd) { Path source = indexDirectoryPath.resolve(filename); Path target = destinationPath.resolve(filename); Files.copy(source, target); } // delete files exist in dest but not in source Set<String> toDelete = new HashSet<>(destFileNames); toDelete.removeAll(sourceFileNames); for (String filename : toDelete) { Path path = destinationPath.resolve(filename); Files.delete(path); } long backupEndTime = System.currentTimeMillis(); logInfo("Incremental backup performed. dir=%s, added=%d, deleted=%d, took=%dms", destinationPath, toAdd.size(), toDelete.size(), backupEndTime - backupStartTime); } finally { if (tempDir != null) { FileUtils.deleteFiles(tempDir.toFile()); } } } } finally { if (snapshotter != null && commit != null) { snapshotter.release(commit); } writer.deleteUnusedFiles(); } }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
private void handleBackup(Operation op, BackupRequest req) throws Throwable { SnapshotDeletionPolicy snapshotter = null; IndexCommit commit = null;//from w w w. ja va 2 s .com handleMaintenanceImpl(true); IndexWriter w = this.writer; if (w == null) { op.fail(new CancellationException()); return; } try { // Create a snapshot so the index files won't be deleted. snapshotter = (SnapshotDeletionPolicy) w.getConfig().getIndexDeletionPolicy(); commit = snapshotter.snapshot(); String indexDirectory = UriUtils.buildUriPath(getHost().getStorageSandbox().getPath(), this.indexDirectory); // Add the files in the commit to a zip file. List<URI> fileList = FileUtils.filesToUris(indexDirectory, commit.getFileNames()); req.backupFile = FileUtils.zipFiles(fileList, this.indexDirectory + "-" + Utils.getNowMicrosUtc()); op.setBody(req).complete(); } catch (Exception e) { this.logSevere(e); throw e; } finally { if (snapshotter != null) { snapshotter.release(commit); } w.deleteUnusedFiles(); } }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
private boolean applyIndexSearcherAndFileLimit() { File directory = new File(new File(getHost().getStorageSandbox()), this.indexDirectory); String[] list = directory.list(); int count = list == null ? 0 : list.length; boolean reOpenWriter = count >= INDEX_FILE_COUNT_THRESHOLD_FOR_WRITER_REFRESH; int searcherCount = this.searchersPendingClose.size(); if (searcherCount < INDEX_SEARCHER_COUNT_THRESHOLD && !reOpenWriter) { return reOpenWriter; }// www . j a v a2s . c o m // We always close index searchers before re-opening the index writer, otherwise we risk // loosing pending commits on writer re-open. Notice this code executes if we either have // too many index files on disk, thus we need to re-open the writer to consolidate, or // when we have too many pending searchers final int acquireReleaseCount = QUERY_THREAD_COUNT + UPDATE_THREAD_COUNT; try { if (getHost().isStopping()) { return false; } this.writerAvailable.release(); this.writerAvailable.acquire(acquireReleaseCount); this.searcher = null; logInfo("Closing %d pending searchers, index file count: %d", searcherCount, count); for (IndexSearcher s : this.searchersPendingClose) { try { s.getIndexReader().close(); } catch (Throwable e) { } } this.searchersPendingClose.clear(); IndexWriter w = this.writer; if (w != null) { try { w.deleteUnusedFiles(); } catch (Throwable e) { } } } catch (InterruptedException e1) { logSevere(e1); } finally { // release all but one, so we stay owning one reference to the semaphore this.writerAvailable.release(acquireReleaseCount - 1); } return reOpenWriter; }
From source file:org.apache.solr.handler.IndexFetcher.java
License:Apache License
/** * This command downloads all the necessary files from master to install a index commit point. Only changed files are * downloaded. It also downloads the conf files (if they are modified). * * @param core the SolrCore//w w w.j a v a2 s. c om * @param forceReplication force a replication in all cases * @return true on success, false if slave is already in sync * @throws IOException if an exception occurs */ boolean fetchLatestIndex(final SolrCore core, boolean forceReplication) throws IOException, InterruptedException { successfulInstall = false; replicationStartTime = System.currentTimeMillis(); Directory tmpIndexDir = null; String tmpIndex = null; Directory indexDir = null; String indexDirPath = null; boolean deleteTmpIdxDir = true; try { //get the current 'replicateable' index version in the master NamedList response = null; try { response = getLatestVersion(); } catch (Exception e) { LOG.error("Master at: " + masterUrl + " is not available. Index fetch failed. Exception: " + e.getMessage()); return false; } long latestVersion = (Long) response.get(CMD_INDEX_VERSION); long latestGeneration = (Long) response.get(GENERATION); // TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes) IndexCommit commit = core.getDeletionPolicy().getLatestCommit(); if (commit == null) { // Presumably the IndexWriter hasn't been opened yet, and hence the deletion policy hasn't been updated with commit points RefCounted<SolrIndexSearcher> searcherRefCounted = null; try { searcherRefCounted = core.getNewestSearcher(false); if (searcherRefCounted == null) { LOG.warn("No open searcher found - fetch aborted"); return false; } commit = searcherRefCounted.get().getIndexReader().getIndexCommit(); } finally { if (searcherRefCounted != null) searcherRefCounted.decref(); } } if (latestVersion == 0L) { if (forceReplication && commit.getGeneration() != 0) { // since we won't get the files for an empty index, // we just clear ours and commit RefCounted<IndexWriter> iw = core.getUpdateHandler().getSolrCoreState().getIndexWriter(core); try { iw.get().deleteAll(); } finally { iw.decref(); } SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams()); core.getUpdateHandler().commit(new CommitUpdateCommand(req, false)); } //there is nothing to be replicated successfulInstall = true; return true; } if (!forceReplication && IndexDeletionPolicyWrapper.getCommitTimestamp(commit) == latestVersion) { //master and slave are already in sync just return LOG.info("Slave in sync with master."); successfulInstall = true; return true; } LOG.info("Master's generation: " + latestGeneration); LOG.info("Slave's generation: " + commit.getGeneration()); LOG.info("Starting replication process"); // get the list of files first fetchFileList(latestGeneration); // this can happen if the commit point is deleted before we fetch the file list. if (filesToDownload.isEmpty()) return false; LOG.info("Number of files in latest index in master: " + filesToDownload.size()); // Create the sync service fsyncService = Executors.newSingleThreadExecutor(new DefaultSolrThreadFactory("fsyncService")); // use a synchronized list because the list is read by other threads (to show details) filesDownloaded = Collections.synchronizedList(new ArrayList<Map<String, Object>>()); // if the generation of master is older than that of the slave , it means they are not compatible to be copied // then a new index directory to be created and all the files need to be copied boolean isFullCopyNeeded = IndexDeletionPolicyWrapper.getCommitTimestamp(commit) >= latestVersion || commit.getGeneration() >= latestGeneration || forceReplication; String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date()); tmpIndex = createTempindexDir(core, tmpIdxDirName); tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); // cindex dir... indexDirPath = core.getIndexDir(); indexDir = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); try { if (isIndexStale(indexDir)) { isFullCopyNeeded = true; } if (!isFullCopyNeeded) { // a searcher might be using some flushed but not committed segments // because of soft commits (which open a searcher on IW's data) // so we need to close the existing searcher on the last commit // and wait until we are able to clean up all unused lucene files if (solrCore.getCoreDescriptor().getCoreContainer().isZooKeeperAware()) { solrCore.closeSearcher(); } // rollback and reopen index writer and wait until all unused files // are successfully deleted solrCore.getUpdateHandler().newIndexWriter(true); RefCounted<IndexWriter> writer = solrCore.getUpdateHandler().getSolrCoreState() .getIndexWriter(null); try { IndexWriter indexWriter = writer.get(); int c = 0; indexWriter.deleteUnusedFiles(); while (hasUnusedFiles(indexDir, commit)) { indexWriter.deleteUnusedFiles(); LOG.info("Sleeping for 1000ms to wait for unused lucene index files to be delete-able"); Thread.sleep(1000); c++; if (c >= 30) { LOG.warn( "IndexFetcher unable to cleanup unused lucene index files so we must do a full copy instead"); isFullCopyNeeded = true; break; } } if (c > 0) { LOG.info("IndexFetcher slept for " + (c * 1000) + "ms for unused lucene index files to be delete-able"); } } finally { writer.decref(); } solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(core, true); } boolean reloadCore = false; try { LOG.info("Starting download to " + tmpIndexDir + " fullCopy=" + isFullCopyNeeded); successfulInstall = false; downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir, latestGeneration); LOG.info("Total time taken for download : " + ((System.currentTimeMillis() - replicationStartTime) / 1000) + " secs"); Collection<Map<String, Object>> modifiedConfFiles = getModifiedConfFiles(confFilesToDownload); if (!modifiedConfFiles.isEmpty()) { downloadConfFiles(confFilesToDownload, latestGeneration); if (isFullCopyNeeded) { successfulInstall = modifyIndexProps(tmpIdxDirName); deleteTmpIdxDir = false; } else { successfulInstall = moveIndexFiles(tmpIndexDir, indexDir); } if (successfulInstall) { if (isFullCopyNeeded) { // let the system know we are changing dir's and the old one // may be closed if (indexDir != null) { LOG.info("removing old index directory " + indexDir); core.getDirectoryFactory().doneWithDirectory(indexDir); core.getDirectoryFactory().remove(indexDir); } } LOG.info("Configuration files are modified, core will be reloaded"); logReplicationTimeAndConfFiles(modifiedConfFiles, successfulInstall);// write to a file time of replication and // conf files. reloadCore = true; } } else { terminateAndWaitFsyncService(); if (isFullCopyNeeded) { successfulInstall = modifyIndexProps(tmpIdxDirName); deleteTmpIdxDir = false; } else { successfulInstall = moveIndexFiles(tmpIndexDir, indexDir); } if (successfulInstall) { logReplicationTimeAndConfFiles(modifiedConfFiles, successfulInstall); } } } finally { if (!isFullCopyNeeded) { solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(core); } } // we must reload the core after we open the IW back up if (reloadCore) { reloadCore(); } if (successfulInstall) { if (isFullCopyNeeded) { // let the system know we are changing dir's and the old one // may be closed if (indexDir != null) { LOG.info("removing old index directory " + indexDir); core.getDirectoryFactory().doneWithDirectory(indexDir); core.getDirectoryFactory().remove(indexDir); } } if (isFullCopyNeeded) { solrCore.getUpdateHandler().newIndexWriter(isFullCopyNeeded); } openNewSearcherAndUpdateCommitPoint(); } replicationStartTime = 0; return successfulInstall; } catch (ReplicationHandlerException e) { LOG.error("User aborted Replication"); return false; } catch (SolrException e) { throw e; } catch (InterruptedException e) { throw new InterruptedException("Index fetch interrupted"); } catch (Exception e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Index fetch failed : ", e); } } finally { try { if (!successfulInstall) { try { logReplicationTimeAndConfFiles(null, successfulInstall); } catch (Exception e) { LOG.error("caught", e); } } filesToDownload = filesDownloaded = confFilesDownloaded = confFilesToDownload = null; replicationStartTime = 0; dirFileFetcher = null; localFileFetcher = null; if (fsyncService != null && !fsyncService.isShutdown()) fsyncService.shutdownNow(); fsyncService = null; stop = false; fsyncException = null; } finally { if (deleteTmpIdxDir && tmpIndexDir != null) { try { core.getDirectoryFactory().doneWithDirectory(tmpIndexDir); core.getDirectoryFactory().remove(tmpIndexDir); } catch (IOException e) { SolrException.log(LOG, "Error removing directory " + tmpIndexDir, e); } } if (tmpIndexDir != null) { core.getDirectoryFactory().release(tmpIndexDir); } if (indexDir != null) { core.getDirectoryFactory().release(indexDir); } } } }