Example usage for org.apache.lucene.index ConcurrentMergeScheduler ConcurrentMergeScheduler

List of usage examples for org.apache.lucene.index ConcurrentMergeScheduler ConcurrentMergeScheduler

Introduction

In this page you can find the example usage for org.apache.lucene.index ConcurrentMergeScheduler ConcurrentMergeScheduler.

Prototype

public ConcurrentMergeScheduler() 

Source Link

Document

Sole constructor, with all settings set to default values.

Usage

From source file:com.flaptor.hounder.Index.java

License:Apache License

private Index(final File path, boolean create) {
    if (!create) {
        if (!path.exists()) {
            throw new IllegalArgumentException("The path passed to the costructor doesn't exist.");
        }/*ww  w. j av  a2 s.  c o m*/
        if (!path.isDirectory()) {
            throw new IllegalArgumentException("The path passed to the constructor is not a directory");
        }
    } else {
        if (path.exists()) {
            throw new IllegalArgumentException(
                    "Cannot create index on " + path.getAbsolutePath() + ". Path exists.");
        }
    }
    this.path = path;
    properties = new Properties();
    if (!create) {
        File propFile = new File(path.getAbsolutePath() + File.separator + "index.properties");
        if (!propFile.exists()) {
            throw new IllegalArgumentException(
                    "Cannot find the properties inside the index. Is the index corrupted?.");
        }
        if (!propFile.isFile()) {
            throw new IllegalArgumentException(
                    "There's no file named index.properties in the index. Maybe it is a directory?");
        }
        InputStream is = null;
        try {
            is = new FileInputStream(propFile);
            properties.load(is);
            indexDescriptor = new IndexDescriptor(properties.get("indexDescriptor").toString());
        } catch (IOException e) {
            logger.error(
                    "Exception while trying to load index.properties for index at " + path.getAbsolutePath(),
                    e);
        } catch (NullPointerException e) {
            logger.error("There is no index descriptor on " + propFile.getName()
                    + ". using default. Exception caused by " + e.getMessage(), e);
            indexDescriptor = IndexDescriptor.defaultDescriptor();
        } finally {
            com.flaptor.util.Execute.close(is, logger);
        }
    }
    Config config = Config.getConfig("common.properties");

    failOnLegacyParameters(config);

    ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
    cms.setMaxThreadCount(12);
    cms.setMergeThreadPriority(Thread.MIN_PRIORITY);
    this.mergeScheduler = cms;

    LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
    smallSegmentSize = config.getFloat("Index.smallSegmentSizeMB");
    mp.setMinMergeMB(smallSegmentSize);
    mp.setMergeFactor(config.getInt("Index.mergeFactor"));
    mergePolicy = mp;

    createAnalyzer();

    setUpDirectory(create);
}

From source file:com.qwazr.search.bench.LuceneCommonIndex.java

License:Apache License

LuceneCommonIndex(final Path rootDirectory, final String schemaName, final String indexName,
        final double ramBufferSize, final boolean useCompoundFile) throws IOException {

    final Path schemaDirectory = Files.createDirectory(rootDirectory.resolve(schemaName));
    this.indexDirectory = Files.createDirectory(schemaDirectory.resolve(indexName));
    this.luceneDirectory = indexDirectory.resolve("data");
    this.dataDirectory = FSDirectory.open(luceneDirectory);
    final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(
            new PerFieldAnalyzerWrapper(new StandardAnalyzer()));
    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    indexWriterConfig.setRAMBufferSizeMB(ramBufferSize);

    final ConcurrentMergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
    mergeScheduler.setMaxMergesAndThreads(MAX_SSD_MERGE_THREADS, MAX_SSD_MERGE_THREADS);
    indexWriterConfig.setMergeScheduler(mergeScheduler);
    indexWriterConfig.setUseCompoundFile(useCompoundFile);

    final TieredMergePolicy mergePolicy = new TieredMergePolicy();
    indexWriterConfig.setMergePolicy(mergePolicy);

    // We use snapshots deletion policy
    final SnapshotDeletionPolicy snapshotDeletionPolicy = new SnapshotDeletionPolicy(
            indexWriterConfig.getIndexDeletionPolicy());
    indexWriterConfig.setIndexDeletionPolicy(snapshotDeletionPolicy);

    this.indexWriter = new IndexWriter(this.dataDirectory, indexWriterConfig);
    this.localReplicator = new LocalReplicator();
}

From source file:io.anserini.embeddings.search.IndexW2V.java

License:Apache License

public void indexEmbeddings() throws IOException, InterruptedException {
    LOG.info("Starting indexer...");

    final Directory dir = FSDirectory.open(indexPath);
    final WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
    final IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    config.setUseCompoundFile(false);/*from w w w  . j  a  v a2 s  . com*/
    config.setMergeScheduler(new ConcurrentMergeScheduler());

    final IndexWriter writer = new IndexWriter(dir, config);
    Document document = new Document();
    BufferedReader bRdr = new BufferedReader(new FileReader(args.input));
    String line = null;
    bRdr.readLine();
    while ((line = bRdr.readLine()) != null) {
        String[] termEmbedding = line.trim().split("\t");
        document.add(new StringField(LuceneDocumentGenerator.FIELD_ID, termEmbedding[0], Field.Store.YES));
        document.add(new StoredField(LuceneDocumentGenerator.FIELD_BODY, termEmbedding[1]));
    }
}

From source file:io.anserini.index.IndexClueWeb09b.java

License:Apache License

public int indexWithThreads(int numThreads) throws IOException, InterruptedException {

    System.out.println(/*from w ww .  j  a  v  a  2 s  .co  m*/
            "Indexing with " + numThreads + " threads to directory '" + indexPath.toAbsolutePath() + "'...");

    final Directory dir = FSDirectory.open(indexPath);

    final IndexWriterConfig iwc = new IndexWriterConfig(analyzer());

    iwc.setSimilarity(new BM25Similarity());
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(256.0);
    iwc.setUseCompoundFile(false);
    iwc.setMergeScheduler(new ConcurrentMergeScheduler());

    final IndexWriter writer = new IndexWriter(dir, iwc);

    final ExecutorService executor = Executors.newFixedThreadPool(numThreads);

    List<Path> warcFiles = discoverWarcFiles(docDir);
    if (doclimit > 0 && warcFiles.size() < doclimit)
        warcFiles = warcFiles.subList(0, doclimit);

    for (Path f : warcFiles)
        executor.execute(new IndexerThread(writer, f));

    //add some delay to let some threads spawn by scheduler
    Thread.sleep(30000);
    executor.shutdown(); // Disable new tasks from being submitted

    try {
        // Wait for existing tasks to terminate
        while (!executor.awaitTermination(5, TimeUnit.MINUTES)) {
            Thread.sleep(1000);
        }
    } catch (InterruptedException ie) {
        // (Re-)Cancel if current thread also interrupted
        executor.shutdownNow();
        // Preserve interrupt status
        Thread.currentThread().interrupt();
    }

    int numIndexed = writer.maxDoc();

    try {
        writer.commit();
        if (optimize)
            writer.forceMerge(1);
    } finally {
        writer.close();
    }

    return numIndexed;
}

From source file:io.anserini.index.IndexCollection.java

License:Apache License

public void run() throws IOException, InterruptedException {
    final long start = System.nanoTime();
    LOG.info("Starting indexer...");

    int numThreads = args.threads;

    final Directory dir = FSDirectory.open(indexPath);
    final EnglishAnalyzer analyzer = args.keepStopwords ? new EnglishAnalyzer(CharArraySet.EMPTY_SET)
            : new EnglishAnalyzer();
    final IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new BM25Similarity());
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    config.setRAMBufferSizeMB(args.memorybufferSize);
    config.setUseCompoundFile(false);//from ww  w .  j a  v a2 s  . c om
    config.setMergeScheduler(new ConcurrentMergeScheduler());

    final IndexWriter writer = new IndexWriter(dir, config);

    final ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads);
    final List<Path> segmentPaths = collection.getFileSegmentPaths();

    final int segmentCnt = segmentPaths.size();
    LOG.info(segmentCnt + " files found in " + collectionPath.toString());
    for (int i = 0; i < segmentCnt; i++) {
        executor.execute(new IndexerThread(writer, collection, segmentPaths.get(i)));
    }

    executor.shutdown();

    try {
        // Wait for existing tasks to terminate
        while (!executor.awaitTermination(1, TimeUnit.MINUTES)) {
            LOG.info(String.format("%.2f percent completed",
                    (double) executor.getCompletedTaskCount() / segmentCnt * 100.0d));
        }
    } catch (InterruptedException ie) {
        // (Re-)Cancel if current thread also interrupted
        executor.shutdownNow();
        // Preserve interrupt status
        Thread.currentThread().interrupt();
    }

    if (segmentCnt != executor.getCompletedTaskCount()) {
        throw new RuntimeException("totalFiles = " + segmentCnt + " is not equal to completedTaskCount =  "
                + executor.getCompletedTaskCount());
    }

    int numIndexed = writer.maxDoc();

    try {
        writer.commit();
        if (args.optimize)
            writer.forceMerge(1);
    } finally {
        try {
            writer.close();
        } catch (IOException e) {
            // It is possible that this happens... but nothing much we can do at this point,
            // so just log the error and move on.
            LOG.error(e);
        }
    }

    LOG.info("Indexed documents: " + counters.indexedDocuments.get());
    LOG.info("Empty documents: " + counters.emptyDocuments.get());
    LOG.info("Errors: " + counters.errors.get());

    final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
    LOG.info("Total " + numIndexed + " documents indexed in "
            + DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss"));
}

From source file:io.anserini.index.IndexWebCollection.java

License:Apache License

public int indexWithThreads(int numThreads) throws IOException, InterruptedException {

    LOG.info("Indexing with " + numThreads + " threads to directory '" + indexPath.toAbsolutePath() + "'...");

    final Directory dir = FSDirectory.open(indexPath);

    final IndexWriterConfig iwc = new IndexWriterConfig(new EnglishAnalyzer());

    iwc.setSimilarity(new BM25Similarity());
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(512);//from ww  w .  j  av  a2 s .  c  o  m
    iwc.setUseCompoundFile(false);
    iwc.setMergeScheduler(new ConcurrentMergeScheduler());

    final IndexWriter writer = new IndexWriter(dir, iwc);

    final ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads);
    final String suffix = Collection.GOV2.equals(collection) ? ".gz" : ".warc.gz";
    final Deque<Path> warcFiles = discoverWarcFiles(docDir, suffix);

    if (doclimit > 0 && warcFiles.size() < doclimit)
        for (int i = doclimit; i < warcFiles.size(); i++)
            warcFiles.removeFirst();

    long totalWarcFiles = warcFiles.size();
    LOG.info(totalWarcFiles + " many " + suffix + " files found under the docs path : " + docDir.toString());

    for (int i = 0; i < 2000; i++) {
        if (!warcFiles.isEmpty())
            executor.execute(new IndexerThread(writer, warcFiles.removeFirst()));
        else {
            if (!executor.isShutdown()) {
                Thread.sleep(30000);
                executor.shutdown();
            }
            break;
        }
    }

    long first = 0;
    //add some delay to let some threads spawn by scheduler
    Thread.sleep(30000);

    try {
        // Wait for existing tasks to terminate
        while (!executor.awaitTermination(1, TimeUnit.MINUTES)) {

            final long completedTaskCount = executor.getCompletedTaskCount();

            LOG.info(String.format("%.2f percentage completed",
                    (double) completedTaskCount / totalWarcFiles * 100.0d));

            if (!warcFiles.isEmpty())
                for (long i = first; i < completedTaskCount; i++) {
                    if (!warcFiles.isEmpty())
                        executor.execute(new IndexerThread(writer, warcFiles.removeFirst()));
                    else {
                        if (!executor.isShutdown())
                            executor.shutdown();
                    }
                }

            first = completedTaskCount;
            Thread.sleep(1000);
        }
    } catch (InterruptedException ie) {
        // (Re-)Cancel if current thread also interrupted
        executor.shutdownNow();
        // Preserve interrupt status
        Thread.currentThread().interrupt();
    }

    if (totalWarcFiles != executor.getCompletedTaskCount())
        throw new RuntimeException("totalWarcFiles = " + totalWarcFiles
                + " is not equal to completedTaskCount =  " + executor.getCompletedTaskCount());

    int numIndexed = writer.maxDoc();

    try {
        writer.commit();
        if (optimize)
            writer.forceMerge(1);
    } finally {
        writer.close();
    }

    return numIndexed;
}

From source file:io.anserini.IndexerCW09B.java

License:Apache License

public int indexWithThreads(int numThreads) throws IOException, InterruptedException {

    System.out.println(//from w  w  w.  j a  va  2 s .  co m
            "Indexing with " + numThreads + " threads to directory '" + indexPath.toAbsolutePath() + "'...");

    final Directory dir = FSDirectory.open(indexPath);

    final IndexWriterConfig iwc = new IndexWriterConfig(analyzer());

    iwc.setSimilarity(new BM25Similarity());
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(256.0);
    iwc.setUseCompoundFile(false);
    iwc.setMergeScheduler(new ConcurrentMergeScheduler());

    final IndexWriter writer = new IndexWriter(dir, iwc);

    final ExecutorService executor = Executors.newFixedThreadPool(numThreads);

    for (Path f : discoverWarcFiles(docDir))
        executor.execute(new IndexerThread(writer, f));

    //add some delay to let some threads spawn by scheduler
    Thread.sleep(30000);
    executor.shutdown(); // Disable new tasks from being submitted

    try {
        // Wait for existing tasks to terminate
        while (!executor.awaitTermination(5, TimeUnit.MINUTES)) {
            Thread.sleep(1000);
        }
    } catch (InterruptedException ie) {
        // (Re-)Cancel if current thread also interrupted
        executor.shutdownNow();
        // Preserve interrupt status
        Thread.currentThread().interrupt();
    }

    int numIndexed = writer.maxDoc();

    try {
        writer.commit();
    } finally {
        writer.close();
    }

    return numIndexed;
}

From source file:org.apache.nifi.provenance.lucene.SimpleIndexManager.java

License:Apache License

private IndexWriterCount createWriter(final File indexDirectory) throws IOException {
    final List<Closeable> closeables = new ArrayList<>();
    final Directory directory = FSDirectory.open(indexDirectory);
    closeables.add(directory);/*  w  w w .  j  a v a  2  s  . co m*/

    try {
        final Analyzer analyzer = new StandardAnalyzer();
        closeables.add(analyzer);

        final IndexWriterConfig config = new IndexWriterConfig(LuceneUtil.LUCENE_VERSION, analyzer);

        final ConcurrentMergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
        final int mergeThreads = repoConfig.getConcurrentMergeThreads();
        mergeScheduler.setMaxMergesAndThreads(mergeThreads, mergeThreads);
        config.setMergeScheduler(mergeScheduler);

        final IndexWriter indexWriter = new IndexWriter(directory, config);
        final EventIndexWriter eventIndexWriter = new LuceneEventIndexWriter(indexWriter, indexDirectory);

        final IndexWriterCount writerCount = new IndexWriterCount(eventIndexWriter, analyzer, directory, 1,
                false);
        logger.debug("Providing new index writer for {}", indexDirectory);
        return writerCount;
    } catch (final IOException ioe) {
        for (final Closeable closeable : closeables) {
            try {
                closeable.close();
            } catch (final IOException ioe2) {
                ioe.addSuppressed(ioe2);
            }
        }

        throw ioe;
    }
}

From source file:org.apache.nifi.provenance.lucene.StandardIndexManager.java

License:Apache License

private IndexWriterCount createWriter(final File indexDirectory) throws IOException {
    final List<Closeable> closeables = new ArrayList<>();
    final Directory directory = FSDirectory.open(indexDirectory.toPath());
    closeables.add(directory);//from  w w w .j  av a2 s.c  o m

    try {
        final Analyzer analyzer = new StandardAnalyzer();
        closeables.add(analyzer);

        final IndexWriterConfig config = new IndexWriterConfig(analyzer);

        final ConcurrentMergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
        final int mergeThreads = repoConfig.getConcurrentMergeThreads();
        mergeScheduler.setMaxMergesAndThreads(mergeThreads, mergeThreads);
        config.setMergeScheduler(mergeScheduler);

        final IndexWriter indexWriter = new IndexWriter(directory, config);
        final EventIndexWriter eventIndexWriter = new LuceneEventIndexWriter(indexWriter, indexDirectory);

        final IndexWriterCount writerCount = new IndexWriterCount(eventIndexWriter, analyzer, directory, 1,
                false);
        logger.debug("Providing new index writer for {}", indexDirectory);
        return writerCount;
    } catch (final IOException ioe) {
        for (final Closeable closeable : closeables) {
            try {
                closeable.close();
            } catch (final IOException ioe2) {
                ioe.addSuppressed(ioe2);
            }
        }

        throw ioe;
    }
}

From source file:org.compass.core.lucene.engine.merge.scheduler.ConcurrentMergeSchedulerProvider.java

License:Apache License

/**
 * Returns Lucene {@link org.apache.lucene.index.ConcurrentMergeScheduler} allowing to configure
 * using {@link org.compass.core.lucene.LuceneEnvironment.MergeScheduler.Concurrent}.
 */// w w  w  . ja v  a  2  s.  com
public MergeScheduler create(LuceneSearchEngineIndexManager indexManager, CompassSettings settings)
        throws SearchEngineException {
    ConcurrentMergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
    mergeScheduler.setMaxThreadCount(
            settings.getSettingAsInt(LuceneEnvironment.MergeScheduler.Concurrent.MAX_THREAD_COUNT, 3));
    mergeScheduler.setMergeThreadPriority(settings.getSettingAsInt(
            LuceneEnvironment.MergeScheduler.Concurrent.THREAD_PRIORITY, Thread.NORM_PRIORITY));
    return mergeScheduler;
}