Example usage for org.apache.lucene.index IndexWriterConfig setIndexDeletionPolicy

List of usage examples for org.apache.lucene.index IndexWriterConfig setIndexDeletionPolicy

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setIndexDeletionPolicy.

Prototype

public IndexWriterConfig setIndexDeletionPolicy(IndexDeletionPolicy delPolicy) 

Source Link

Document

Expert: allows an optional IndexDeletionPolicy implementation to be specified.

Usage

From source file:cn.hbu.cs.esearch.index.DiskSearchIndex.java

License:Apache License

/**
 * Opens an index modifier.//from ww w .  j  ava2  s  . com
 * @param analyzer Analyzer
 * @return IndexModifer instance
 */
@Override
public IndexWriter openIndexWriter(Analyzer analyzer, Similarity similarity) throws IOException {
    if (_indexWriter != null) {
        return _indexWriter;
    }

    Directory directory = _dirMgr.getDirectory(true);
    log.info("opening index writer at: " + _dirMgr.getPath());

    EsearchMergePolicy mergePolicy = new EsearchMergePolicy();
    mergePolicy.setMergePolicyParams(_mergePolicyParams);

    // hao: autocommit is set to false with this constructor
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer);
    config.setOpenMode(OpenMode.CREATE_OR_APPEND);
    _deletionPolicy = new ZoieIndexDeletionPolicy();
    config.setIndexDeletionPolicy(_deletionPolicy);
    config.setMergeScheduler(_mergeScheduler);
    config.setMergePolicy(mergePolicy);
    config.setReaderPooling(false);
    if (similarity != null) {
        config.setSimilarity(similarity);
    }
    config.setRAMBufferSizeMB(5);
    IndexWriter idxWriter = new IndexWriter(directory, config);

    // we need retrieve deletionPolicy from IndexWriter since deletionPolicy is deep cloned
    _deletionPolicy = (ZoieIndexDeletionPolicy) (idxWriter.getConfig().getIndexDeletionPolicy());
    _indexWriter = idxWriter;
    return idxWriter;
}

From source file:com.b2international.index.compat.SingleDirectoryIndexImpl.java

License:Apache License

protected void initLucene(final File indexDirectory, final boolean clean) {
    try {/*from w w w  .  j a  v  a2s  . c o m*/
        this.directory = Directories.openFile(indexDirectory.toPath());
        final Analyzer analyzer = new ComponentTermAnalyzer();
        final IndexWriterConfig config = new IndexWriterConfig(analyzer);
        config.setOpenMode(clean ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND);
        config.setIndexDeletionPolicy(new SnapshotDeletionPolicy(config.getIndexDeletionPolicy()));
        this.writer = new IndexWriter(directory, config);
        this.writer.commit(); // Create index if it didn't exist
        this.manager = new SearcherManager(directory, new SearchWarmerFactory());
    } catch (final IOException e) {
        throw new RuntimeException(e.getMessage(), e);
    }
}

From source file:com.mathworks.xzheng.admin.Fragments.java

License:Apache License

public void test() throws Exception {
    Directory dir = null;/* w  w w.  j a  v a2s. c  om*/
    Analyzer analyzer = null;
    // START
    IndexDeletionPolicy policy = new KeepOnlyLastCommitDeletionPolicy();
    SnapshotDeletionPolicy snapshotter = new SnapshotDeletionPolicy(policy);

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
    config.setIndexDeletionPolicy(snapshotter);
    IndexWriter writer = new IndexWriter(dir, config);
    // END

    IndexCommit commit = null;
    try {
        commit = (IndexCommit) snapshotter.snapshot();
        Collection<String> fileNames = commit.getFileNames();
        /*<iterate over & copy files from fileNames>*/
    } finally {
        snapshotter.release(commit);
    }
}

From source file:com.qwazr.search.bench.LuceneCommonIndex.java

License:Apache License

LuceneCommonIndex(final Path rootDirectory, final String schemaName, final String indexName,
        final double ramBufferSize, final boolean useCompoundFile) throws IOException {

    final Path schemaDirectory = Files.createDirectory(rootDirectory.resolve(schemaName));
    this.indexDirectory = Files.createDirectory(schemaDirectory.resolve(indexName));
    this.luceneDirectory = indexDirectory.resolve("data");
    this.dataDirectory = FSDirectory.open(luceneDirectory);
    final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(
            new PerFieldAnalyzerWrapper(new StandardAnalyzer()));
    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    indexWriterConfig.setRAMBufferSizeMB(ramBufferSize);

    final ConcurrentMergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
    mergeScheduler.setMaxMergesAndThreads(MAX_SSD_MERGE_THREADS, MAX_SSD_MERGE_THREADS);
    indexWriterConfig.setMergeScheduler(mergeScheduler);
    indexWriterConfig.setUseCompoundFile(useCompoundFile);

    final TieredMergePolicy mergePolicy = new TieredMergePolicy();
    indexWriterConfig.setMergePolicy(mergePolicy);

    // We use snapshots deletion policy
    final SnapshotDeletionPolicy snapshotDeletionPolicy = new SnapshotDeletionPolicy(
            indexWriterConfig.getIndexDeletionPolicy());
    indexWriterConfig.setIndexDeletionPolicy(snapshotDeletionPolicy);

    this.indexWriter = new IndexWriter(this.dataDirectory, indexWriterConfig);
    this.localReplicator = new LocalReplicator();
}

From source file:com.qwazr.search.index.IndexInstance.java

License:Apache License

/**
 * @param schema/*from   ww w  . j  a  v a2  s . c  om*/
 * @param indexDirectory
 * @return
 */
final static IndexInstance newInstance(SchemaInstance schema, File indexDirectory,
        IndexSettingsDefinition settings)
        throws ServerException, IOException, ReflectiveOperationException, InterruptedException {
    UpdatableAnalyzer indexAnalyzer = null;
    UpdatableAnalyzer queryAnalyzer = null;
    IndexWriter indexWriter = null;
    Directory dataDirectory = null;
    try {

        if (!indexDirectory.exists())
            indexDirectory.mkdir();
        if (!indexDirectory.isDirectory())
            throw new IOException(
                    "This name is not valid. No directory exists for this location: " + indexDirectory);

        FileSet fileSet = new FileSet(indexDirectory);

        //Loading the settings
        if (settings == null) {
            settings = fileSet.settingsFile.exists()
                    ? JsonMapper.MAPPER.readValue(fileSet.settingsFile, IndexSettingsDefinition.class)
                    : IndexSettingsDefinition.EMPTY;
        } else {
            JsonMapper.MAPPER.writeValue(fileSet.settingsFile, settings);
        }

        //Loading the fields
        File fieldMapFile = new File(indexDirectory, FIELDS_FILE);
        LinkedHashMap<String, FieldDefinition> fieldMap = fieldMapFile.exists()
                ? JsonMapper.MAPPER.readValue(fieldMapFile, FieldDefinition.MapStringFieldTypeRef)
                : new LinkedHashMap<>();

        //Loading the fields
        File analyzerMapFile = new File(indexDirectory, ANALYZERS_FILE);
        LinkedHashMap<String, AnalyzerDefinition> analyzerMap = analyzerMapFile.exists()
                ? JsonMapper.MAPPER.readValue(analyzerMapFile, AnalyzerDefinition.MapStringAnalyzerTypeRef)
                : new LinkedHashMap<>();

        AnalyzerContext context = new AnalyzerContext(analyzerMap, fieldMap);
        indexAnalyzer = new UpdatableAnalyzer(context, context.indexAnalyzerMap);
        queryAnalyzer = new UpdatableAnalyzer(context, context.queryAnalyzerMap);

        // Open and lock the data directory
        dataDirectory = FSDirectory.open(fileSet.dataDirectory.toPath());

        // Set
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(indexAnalyzer);
        if (settings != null && settings.similarity_class != null)
            indexWriterConfig.setSimilarity(IndexUtils.findSimilarity(settings.similarity_class));
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        SnapshotDeletionPolicy snapshotDeletionPolicy = new SnapshotDeletionPolicy(
                indexWriterConfig.getIndexDeletionPolicy());
        indexWriterConfig.setIndexDeletionPolicy(snapshotDeletionPolicy);
        indexWriter = new IndexWriter(dataDirectory, indexWriterConfig);
        if (indexWriter.hasUncommittedChanges())
            indexWriter.commit();

        // Finally we build the SearchSearcherManger
        SearcherManager searcherManager = new SearcherManager(indexWriter, true, null);

        return new IndexInstance(schema, dataDirectory, settings, analyzerMap, fieldMap, fileSet, indexWriter,
                searcherManager, queryAnalyzer);
    } catch (IOException | ServerException | ReflectiveOperationException | InterruptedException e) {
        // We failed in opening the index. We close everything we can
        if (queryAnalyzer != null)
            IOUtils.closeQuietly(queryAnalyzer);
        if (indexAnalyzer != null)
            IOUtils.closeQuietly(indexAnalyzer);
        if (indexWriter != null)
            IOUtils.closeQuietly(indexWriter);
        if (dataDirectory != null)
            IOUtils.closeQuietly(dataDirectory);
        throw e;
    }
}

From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java

License:Open Source License

public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception {
    Directory dir = MMapDirectory.open(directory.toPath());
    Analyzer analyzer = new SimpleAnalyzer();

    // Upgrade the index in place if necessary.
    if (doUpgrade && DirectoryReader.indexExists(dir)) {
        upgradeIndex(dir);// ww  w. ja  v  a 2  s. c om
    }

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()));
    Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
    if (totalMBs != null) {
        // give half to the index, the other half we keep for service caching context
        totalMBs = Math.max(1, totalMBs / 2);
        iwc.setRAMBufferSizeMB(totalMBs);
    }

    this.writer = new IndexWriter(dir, iwc);
    this.writer.commit();
    this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
    this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros;
    return this.writer;
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception {
    Analyzer analyzer = new SimpleAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
    if (totalMBs != null) {
        long cacheSizeMB = (totalMBs * 3) / 4;
        cacheSizeMB = Math.max(1, cacheSizeMB);
        iwc.setRAMBufferSizeMB(cacheSizeMB);
        this.linkAccessMemoryLimitMB = totalMBs / 4;
    }//from w  w  w .  jav  a 2s  . c  o m

    Directory dir = MMapDirectory.open(directory.toPath());

    // Upgrade the index in place if necessary.
    if (doUpgrade && DirectoryReader.indexExists(dir)) {
        upgradeIndex(dir);
    }

    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()));

    IndexWriter w = new IndexWriter(dir, iwc);
    w.commit();

    synchronized (this.searchSync) {
        this.writer = w;
        this.linkAccessTimes.clear();
        this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
        this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros;
    }
    return this.writer;
}

From source file:com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm.java

License:Apache License

private void createWriter() throws IOException {
    IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, null);
    config.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
    writer = new IndexWriter(dir, config);
    taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
}

From source file:com.xiaomi.linden.hadoop.indexing.reduce.ShardWriter.java

License:Apache License

/**
 * Constructor/*from  ww  w.  ja  v a  2s .  c om*/
 * @param fs
 * @param shard
 * @param tempDir
 * @param conf
 * @throws IOException
 */
public ShardWriter(FileSystem fs, Shard shard, String tempDir, Configuration conf) throws IOException {
    logger.info("Construct a shard writer");

    this.conf = conf;
    this.fs = fs;
    localFs = FileSystem.getLocal(conf);
    perm = new Path(shard.getDirectory());
    taxoPerm = new Path(shard.getDirectory() + ".taxonomy");
    String indexDir = tempDir + "/" + "index";
    String taxoDir = tempDir + "/" + "taxo";
    temp = new Path(indexDir);
    taxoTemp = new Path(taxoDir);

    if (localFs.exists(temp)) {
        File tempFile = new File(temp.getName());
        if (tempFile.exists()) {
            LindenReducer.deleteDir(tempFile);
        }
    }

    if (!fs.exists(perm)) {
        fs.mkdirs(perm);
    } else {
        moveToTrash(conf, perm);
        fs.mkdirs(perm);
    }

    if (!fs.exists(taxoPerm)) {
        fs.mkdirs(taxoPerm);
    } else {
        moveToTrash(conf, taxoPerm);
        fs.mkdirs(taxoPerm);
    }
    IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, null);
    config.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
    writer = new IndexWriter(FSDirectory.open(new File(indexDir)), config);
    taxoWriter = new DirectoryTaxonomyWriter(FSDirectory.open(new File(taxoDir)));
}

From source file:io.anserini.IndexerCW09B.java

License:Apache License

public int indexWithThreads(int numThreads) throws IOException, InterruptedException {

    System.out.println(//from w  w  w  .j av a 2  s . c  o  m
            "Indexing with " + numThreads + " threads to directory '" + indexPath.toAbsolutePath() + "'...");

    final Directory dir = FSDirectory.open(indexPath);

    final IndexWriterConfig iwc = new IndexWriterConfig(analyzer());

    iwc.setSimilarity(new BM25Similarity());
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(256.0);
    iwc.setUseCompoundFile(false);
    iwc.setMergeScheduler(new ConcurrentMergeScheduler());

    final IndexWriter writer = new IndexWriter(dir, iwc);

    final ExecutorService executor = Executors.newFixedThreadPool(numThreads);

    for (Path f : discoverWarcFiles(docDir))
        executor.execute(new IndexerThread(writer, f));

    //add some delay to let some threads spawn by scheduler
    Thread.sleep(30000);
    executor.shutdown(); // Disable new tasks from being submitted

    try {
        // Wait for existing tasks to terminate
        while (!executor.awaitTermination(5, TimeUnit.MINUTES)) {
            Thread.sleep(1000);
        }
    } catch (InterruptedException ie) {
        // (Re-)Cancel if current thread also interrupted
        executor.shutdownNow();
        // Preserve interrupt status
        Thread.currentThread().interrupt();
    }

    int numIndexed = writer.maxDoc();

    try {
        writer.commit();
    } finally {
        writer.close();
    }

    return numIndexed;
}