Example usage for org.apache.lucene.index IndexWriter deleteUnusedFiles

List of usage examples for org.apache.lucene.index IndexWriter deleteUnusedFiles

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter deleteUnusedFiles.

Prototype

public synchronized void deleteUnusedFiles() throws IOException 

Source Link

Document

Expert: remove any index files that are no longer used.

Usage

From source file:WriteIndex.java

License:Apache License

/**
 * @param args/*from   w w w.j  a v a  2  s.  c  om*/
 */
public static void main(String[] args) throws IOException {

    File docs = new File("documents");
    File indexDir = new File(INDEX_DIRECTORY);

    Directory directory = FSDirectory.open(indexDir);

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, analyzer);
    IndexWriter writer = new IndexWriter(directory, conf);
    writer.deleteAll();

    for (File file : docs.listFiles()) {
        Metadata metadata = new Metadata();
        ContentHandler handler = new BodyContentHandler();
        ParseContext context = new ParseContext();
        Parser parser = new AutoDetectParser();
        InputStream stream = new FileInputStream(file);
        try {
            parser.parse(stream, handler, metadata, context);
        } catch (TikaException e) {
            e.printStackTrace();
        } catch (SAXException e) {
            e.printStackTrace();
        } finally {
            stream.close();
        }

        String text = handler.toString();
        String fileName = file.getName();

        Document doc = new Document();
        doc.add(new Field("file", fileName, Store.YES, Index.NO));

        for (String key : metadata.names()) {
            String name = key.toLowerCase();
            String value = metadata.get(key);

            if (StringUtils.isBlank(value)) {
                continue;
            }

            if ("keywords".equalsIgnoreCase(key)) {
                for (String keyword : value.split(",?(\\s+)")) {
                    doc.add(new Field(name, keyword, Store.YES, Index.NOT_ANALYZED));
                }
            } else if ("title".equalsIgnoreCase(key)) {
                doc.add(new Field(name, value, Store.YES, Index.ANALYZED));
            } else {
                doc.add(new Field(name, fileName, Store.YES, Index.NOT_ANALYZED));
            }
        }
        doc.add(new Field("text", text, Store.NO, Index.ANALYZED));
        writer.addDocument(doc);

    }

    writer.commit();
    writer.deleteUnusedFiles();

    System.out.println(writer.maxDoc() + " documents written");
}

From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java

License:Open Source License

private void handleBackup(Operation op, BackupRequest req) throws Throwable {
    SnapshotDeletionPolicy snapshotter = null;
    IndexCommit commit = null;/*from  w  w  w . ja  v  a2s  .c  om*/
    handleMaintenanceImpl(true);
    IndexWriter w = this.writer;
    if (w == null) {
        op.fail(new CancellationException());
        return;
    }
    try {
        // Create a snapshot so the index files won't be deleted.
        snapshotter = (SnapshotDeletionPolicy) w.getConfig().getIndexDeletionPolicy();
        commit = snapshotter.snapshot();

        String indexDirectory = UriUtils.buildUriPath(getHost().getStorageSandbox().getPath(),
                FILE_PATH_LUCENE);

        // Add the files in the commit to a zip file.
        List<URI> fileList = FileUtils.filesToUris(indexDirectory, commit.getFileNames());
        req.backupFile = FileUtils.zipFiles(fileList, this.indexDirectory + "-" + Utils.getNowMicrosUtc());

        op.setBody(req).complete();
    } catch (Exception e) {
        this.logSevere(e);
        throw e;
    } finally {
        if (snapshotter != null) {
            snapshotter.release(commit);
        }
        w.deleteUnusedFiles();
    }
}

From source file:com.vmware.xenon.services.common.LuceneBlobIndexService.java

License:Open Source License

private void handleMaintenanceSafe(Operation post) {
    try {/*www. ja  v  a 2  s . c  o m*/
        IndexWriter w = this.writer;
        if (w == null) {
            post.complete();
            return;
        }
        w.commit();

        setStat(LuceneDocumentIndexService.STAT_NAME_INDEXED_DOCUMENT_COUNT, w.maxDoc());
        File directory = new File(new File(getHost().getStorageSandbox()), this.indexDirectory);
        String[] list = directory.list();
        int count = list == null ? 0 : list.length;
        if (count > LuceneDocumentIndexService.getIndexFileCountThresholdForWriterRefresh()) {
            logInfo("Index file count: %d, document count: %d", count, w.maxDoc());
            closeSearcherSafe();
            w.deleteUnusedFiles();
        }

        // Periodically free the buffer. If we are busy serializing requests, they will be ahead of
        // maintenance in the single threaded executor queue, so they will get to re-use the existing
        // allocation
        this.buffer = null;
        post.complete();
    } catch (Throwable e) {
        logSevere(e);
        post.fail(e);
    }
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexBackupService.java

License:Open Source License

private void takeSnapshot(Path destinationPath, boolean isZipBackup, InternalDocumentIndexInfo indexInfo)
        throws IOException {

    IndexWriter writer = indexInfo.indexWriter;
    boolean isInMemoryIndex = indexInfo.indexDirectory == null;

    URI storageSandbox = getHost().getStorageSandbox();

    SnapshotDeletionPolicy snapshotter = null;
    IndexCommit commit = null;/*from   w ww  .  j a  v a2s . c  o m*/
    long backupStartTime = System.currentTimeMillis();
    try {
        // Create a snapshot so the index files won't be deleted.
        writer.commit();
        snapshotter = (SnapshotDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
        commit = snapshotter.snapshot();

        if (isZipBackup) {
            Path tempDir = null;
            try {
                List<URI> fileList = new ArrayList<>();
                if (isInMemoryIndex) {
                    tempDir = Files.createTempDirectory("lucene-in-memory-backup");
                    copyInMemoryLuceneIndexToDirectory(commit, tempDir);
                    List<URI> files = Files.list(tempDir).map(Path::toUri).collect(toList());
                    fileList.addAll(files);
                } else {

                    Path indexDirectoryPath = Paths.get(storageSandbox).resolve(indexInfo.indexDirectory);
                    List<URI> files = commit.getFileNames().stream().map(indexDirectoryPath::resolve)
                            .map(Path::toUri).collect(toList());
                    fileList.addAll(files);
                }

                // Add files in the commit to a zip file.
                FileUtils.zipFiles(fileList, destinationPath.toFile());
            } finally {
                if (tempDir != null) {
                    FileUtils.deleteFiles(tempDir.toFile());
                }
            }
        } else {
            // incremental backup

            // create destination dir if not exist
            if (!Files.exists(destinationPath)) {
                Files.createDirectory(destinationPath);
            }

            Set<String> sourceFileNames = new HashSet<>(commit.getFileNames());

            Set<String> destFileNames = Files.list(destinationPath).filter(Files::isRegularFile)
                    .map(path -> path.getFileName().toString()).collect(toSet());

            Path tempDir = null;
            try {
                Path indexDirectoryPath;
                if (isInMemoryIndex) {
                    // copy files into temp directory and point index directory path to temp dir
                    tempDir = Files.createTempDirectory("lucene-in-memory-backup");
                    copyInMemoryLuceneIndexToDirectory(commit, tempDir);
                    indexDirectoryPath = tempDir;
                } else {
                    indexDirectoryPath = Paths.get(storageSandbox).resolve(indexInfo.indexDirectory);
                }

                // add files exist in source but not in dest
                Set<String> toAdd = new HashSet<>(sourceFileNames);
                toAdd.removeAll(destFileNames);
                for (String filename : toAdd) {
                    Path source = indexDirectoryPath.resolve(filename);
                    Path target = destinationPath.resolve(filename);
                    Files.copy(source, target);
                }

                // delete files exist in dest but not in source
                Set<String> toDelete = new HashSet<>(destFileNames);
                toDelete.removeAll(sourceFileNames);
                for (String filename : toDelete) {
                    Path path = destinationPath.resolve(filename);
                    Files.delete(path);
                }

                long backupEndTime = System.currentTimeMillis();
                logInfo("Incremental backup performed. dir=%s, added=%d, deleted=%d, took=%dms",
                        destinationPath, toAdd.size(), toDelete.size(), backupEndTime - backupStartTime);
            } finally {
                if (tempDir != null) {
                    FileUtils.deleteFiles(tempDir.toFile());
                }
            }
        }
    } finally {
        if (snapshotter != null && commit != null) {
            snapshotter.release(commit);
        }
        writer.deleteUnusedFiles();
    }
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

private void handleBackup(Operation op, BackupRequest req) throws Throwable {
    SnapshotDeletionPolicy snapshotter = null;
    IndexCommit commit = null;//from  w  w  w. ja va  2  s .com
    handleMaintenanceImpl(true);
    IndexWriter w = this.writer;
    if (w == null) {
        op.fail(new CancellationException());
        return;
    }
    try {
        // Create a snapshot so the index files won't be deleted.
        snapshotter = (SnapshotDeletionPolicy) w.getConfig().getIndexDeletionPolicy();
        commit = snapshotter.snapshot();

        String indexDirectory = UriUtils.buildUriPath(getHost().getStorageSandbox().getPath(),
                this.indexDirectory);

        // Add the files in the commit to a zip file.
        List<URI> fileList = FileUtils.filesToUris(indexDirectory, commit.getFileNames());
        req.backupFile = FileUtils.zipFiles(fileList, this.indexDirectory + "-" + Utils.getNowMicrosUtc());

        op.setBody(req).complete();
    } catch (Exception e) {
        this.logSevere(e);
        throw e;
    } finally {
        if (snapshotter != null) {
            snapshotter.release(commit);
        }
        w.deleteUnusedFiles();
    }
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

private boolean applyIndexSearcherAndFileLimit() {
    File directory = new File(new File(getHost().getStorageSandbox()), this.indexDirectory);
    String[] list = directory.list();
    int count = list == null ? 0 : list.length;

    boolean reOpenWriter = count >= INDEX_FILE_COUNT_THRESHOLD_FOR_WRITER_REFRESH;

    int searcherCount = this.searchersPendingClose.size();
    if (searcherCount < INDEX_SEARCHER_COUNT_THRESHOLD && !reOpenWriter) {
        return reOpenWriter;
    }//  www .  j a v a2s .  c o m

    // We always close index searchers before re-opening the index writer, otherwise we risk
    // loosing pending commits on writer re-open. Notice this code executes if we either have
    // too many index files on disk, thus we need to re-open the writer to consolidate, or
    // when we have too many pending searchers
    final int acquireReleaseCount = QUERY_THREAD_COUNT + UPDATE_THREAD_COUNT;
    try {
        if (getHost().isStopping()) {
            return false;
        }

        this.writerAvailable.release();
        this.writerAvailable.acquire(acquireReleaseCount);
        this.searcher = null;

        logInfo("Closing %d pending searchers, index file count: %d", searcherCount, count);

        for (IndexSearcher s : this.searchersPendingClose) {
            try {
                s.getIndexReader().close();
            } catch (Throwable e) {
            }
        }
        this.searchersPendingClose.clear();

        IndexWriter w = this.writer;
        if (w != null) {
            try {
                w.deleteUnusedFiles();
            } catch (Throwable e) {
            }
        }

    } catch (InterruptedException e1) {
        logSevere(e1);
    } finally {
        // release all but one, so we stay owning one reference to the semaphore
        this.writerAvailable.release(acquireReleaseCount - 1);
    }

    return reOpenWriter;
}

From source file:org.apache.solr.handler.IndexFetcher.java

License:Apache License

/**
 * This command downloads all the necessary files from master to install a index commit point. Only changed files are
 * downloaded. It also downloads the conf files (if they are modified).
 *
 * @param core the SolrCore//w w w.j a  v  a2  s.  c  om
 * @param forceReplication force a replication in all cases 
 * @return true on success, false if slave is already in sync
 * @throws IOException if an exception occurs
 */
boolean fetchLatestIndex(final SolrCore core, boolean forceReplication)
        throws IOException, InterruptedException {
    successfulInstall = false;
    replicationStartTime = System.currentTimeMillis();
    Directory tmpIndexDir = null;
    String tmpIndex = null;
    Directory indexDir = null;
    String indexDirPath = null;
    boolean deleteTmpIdxDir = true;
    try {
        //get the current 'replicateable' index version in the master
        NamedList response = null;
        try {
            response = getLatestVersion();
        } catch (Exception e) {
            LOG.error("Master at: " + masterUrl + " is not available. Index fetch failed. Exception: "
                    + e.getMessage());
            return false;
        }
        long latestVersion = (Long) response.get(CMD_INDEX_VERSION);
        long latestGeneration = (Long) response.get(GENERATION);

        // TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes)
        IndexCommit commit = core.getDeletionPolicy().getLatestCommit();
        if (commit == null) {
            // Presumably the IndexWriter hasn't been opened yet, and hence the deletion policy hasn't been updated with commit points
            RefCounted<SolrIndexSearcher> searcherRefCounted = null;
            try {
                searcherRefCounted = core.getNewestSearcher(false);
                if (searcherRefCounted == null) {
                    LOG.warn("No open searcher found - fetch aborted");
                    return false;
                }
                commit = searcherRefCounted.get().getIndexReader().getIndexCommit();
            } finally {
                if (searcherRefCounted != null)
                    searcherRefCounted.decref();
            }
        }

        if (latestVersion == 0L) {
            if (forceReplication && commit.getGeneration() != 0) {
                // since we won't get the files for an empty index,
                // we just clear ours and commit
                RefCounted<IndexWriter> iw = core.getUpdateHandler().getSolrCoreState().getIndexWriter(core);
                try {
                    iw.get().deleteAll();
                } finally {
                    iw.decref();
                }
                SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
                core.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
            }

            //there is nothing to be replicated
            successfulInstall = true;
            return true;
        }

        if (!forceReplication && IndexDeletionPolicyWrapper.getCommitTimestamp(commit) == latestVersion) {
            //master and slave are already in sync just return
            LOG.info("Slave in sync with master.");
            successfulInstall = true;
            return true;
        }
        LOG.info("Master's generation: " + latestGeneration);
        LOG.info("Slave's generation: " + commit.getGeneration());
        LOG.info("Starting replication process");
        // get the list of files first
        fetchFileList(latestGeneration);
        // this can happen if the commit point is deleted before we fetch the file list.
        if (filesToDownload.isEmpty())
            return false;
        LOG.info("Number of files in latest index in master: " + filesToDownload.size());

        // Create the sync service
        fsyncService = Executors.newSingleThreadExecutor(new DefaultSolrThreadFactory("fsyncService"));
        // use a synchronized list because the list is read by other threads (to show details)
        filesDownloaded = Collections.synchronizedList(new ArrayList<Map<String, Object>>());
        // if the generation of master is older than that of the slave , it means they are not compatible to be copied
        // then a new index directory to be created and all the files need to be copied
        boolean isFullCopyNeeded = IndexDeletionPolicyWrapper.getCommitTimestamp(commit) >= latestVersion
                || commit.getGeneration() >= latestGeneration || forceReplication;

        String tmpIdxDirName = "index."
                + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
        tmpIndex = createTempindexDir(core, tmpIdxDirName);

        tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT,
                core.getSolrConfig().indexConfig.lockType);

        // cindex dir...
        indexDirPath = core.getIndexDir();
        indexDir = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT,
                core.getSolrConfig().indexConfig.lockType);

        try {

            if (isIndexStale(indexDir)) {
                isFullCopyNeeded = true;
            }

            if (!isFullCopyNeeded) {
                // a searcher might be using some flushed but not committed segments
                // because of soft commits (which open a searcher on IW's data)
                // so we need to close the existing searcher on the last commit
                // and wait until we are able to clean up all unused lucene files
                if (solrCore.getCoreDescriptor().getCoreContainer().isZooKeeperAware()) {
                    solrCore.closeSearcher();
                }

                // rollback and reopen index writer and wait until all unused files
                // are successfully deleted
                solrCore.getUpdateHandler().newIndexWriter(true);
                RefCounted<IndexWriter> writer = solrCore.getUpdateHandler().getSolrCoreState()
                        .getIndexWriter(null);
                try {
                    IndexWriter indexWriter = writer.get();
                    int c = 0;
                    indexWriter.deleteUnusedFiles();
                    while (hasUnusedFiles(indexDir, commit)) {
                        indexWriter.deleteUnusedFiles();
                        LOG.info("Sleeping for 1000ms to wait for unused lucene index files to be delete-able");
                        Thread.sleep(1000);
                        c++;
                        if (c >= 30) {
                            LOG.warn(
                                    "IndexFetcher unable to cleanup unused lucene index files so we must do a full copy instead");
                            isFullCopyNeeded = true;
                            break;
                        }
                    }
                    if (c > 0) {
                        LOG.info("IndexFetcher slept for " + (c * 1000)
                                + "ms for unused lucene index files to be delete-able");
                    }
                } finally {
                    writer.decref();
                }
                solrCore.getUpdateHandler().getSolrCoreState().closeIndexWriter(core, true);
            }
            boolean reloadCore = false;

            try {
                LOG.info("Starting download to " + tmpIndexDir + " fullCopy=" + isFullCopyNeeded);
                successfulInstall = false;

                downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir, latestGeneration);
                LOG.info("Total time taken for download : "
                        + ((System.currentTimeMillis() - replicationStartTime) / 1000) + " secs");
                Collection<Map<String, Object>> modifiedConfFiles = getModifiedConfFiles(confFilesToDownload);
                if (!modifiedConfFiles.isEmpty()) {
                    downloadConfFiles(confFilesToDownload, latestGeneration);
                    if (isFullCopyNeeded) {
                        successfulInstall = modifyIndexProps(tmpIdxDirName);
                        deleteTmpIdxDir = false;
                    } else {
                        successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
                    }
                    if (successfulInstall) {
                        if (isFullCopyNeeded) {
                            // let the system know we are changing dir's and the old one
                            // may be closed
                            if (indexDir != null) {
                                LOG.info("removing old index directory " + indexDir);
                                core.getDirectoryFactory().doneWithDirectory(indexDir);
                                core.getDirectoryFactory().remove(indexDir);
                            }
                        }

                        LOG.info("Configuration files are modified, core will be reloaded");
                        logReplicationTimeAndConfFiles(modifiedConfFiles, successfulInstall);// write to a file time of replication and
                                                                                             // conf files.
                        reloadCore = true;
                    }
                } else {
                    terminateAndWaitFsyncService();
                    if (isFullCopyNeeded) {
                        successfulInstall = modifyIndexProps(tmpIdxDirName);
                        deleteTmpIdxDir = false;
                    } else {
                        successfulInstall = moveIndexFiles(tmpIndexDir, indexDir);
                    }
                    if (successfulInstall) {
                        logReplicationTimeAndConfFiles(modifiedConfFiles, successfulInstall);
                    }
                }
            } finally {
                if (!isFullCopyNeeded) {
                    solrCore.getUpdateHandler().getSolrCoreState().openIndexWriter(core);
                }
            }

            // we must reload the core after we open the IW back up
            if (reloadCore) {
                reloadCore();
            }

            if (successfulInstall) {
                if (isFullCopyNeeded) {
                    // let the system know we are changing dir's and the old one
                    // may be closed
                    if (indexDir != null) {
                        LOG.info("removing old index directory " + indexDir);
                        core.getDirectoryFactory().doneWithDirectory(indexDir);
                        core.getDirectoryFactory().remove(indexDir);
                    }
                }
                if (isFullCopyNeeded) {
                    solrCore.getUpdateHandler().newIndexWriter(isFullCopyNeeded);
                }

                openNewSearcherAndUpdateCommitPoint();
            }

            replicationStartTime = 0;
            return successfulInstall;
        } catch (ReplicationHandlerException e) {
            LOG.error("User aborted Replication");
            return false;
        } catch (SolrException e) {
            throw e;
        } catch (InterruptedException e) {
            throw new InterruptedException("Index fetch interrupted");
        } catch (Exception e) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Index fetch failed : ", e);
        }
    } finally {
        try {
            if (!successfulInstall) {
                try {
                    logReplicationTimeAndConfFiles(null, successfulInstall);
                } catch (Exception e) {
                    LOG.error("caught", e);
                }
            }
            filesToDownload = filesDownloaded = confFilesDownloaded = confFilesToDownload = null;
            replicationStartTime = 0;
            dirFileFetcher = null;
            localFileFetcher = null;
            if (fsyncService != null && !fsyncService.isShutdown())
                fsyncService.shutdownNow();
            fsyncService = null;
            stop = false;
            fsyncException = null;
        } finally {
            if (deleteTmpIdxDir && tmpIndexDir != null) {
                try {
                    core.getDirectoryFactory().doneWithDirectory(tmpIndexDir);
                    core.getDirectoryFactory().remove(tmpIndexDir);
                } catch (IOException e) {
                    SolrException.log(LOG, "Error removing directory " + tmpIndexDir, e);
                }
            }

            if (tmpIndexDir != null) {
                core.getDirectoryFactory().release(tmpIndexDir);
            }

            if (indexDir != null) {
                core.getDirectoryFactory().release(indexDir);
            }
        }
    }
}