Example usage for org.apache.lucene.index IndexWriter getConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter getConfig.

Prototype

public LiveIndexWriterConfig getConfig()

Source Link

Document

Returns a LiveIndexWriterConfig , which can be used to query the IndexWriter current settings, as well as modify "live" ones.

Usage

From source file:org.archive.index.AsAReference.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * /*from ww w .j a v  a  2  s .c o m*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 4 would mean
                // February 17, 1, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents",
                        new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:org.bidtime.lucene.utils.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory.//from  w w  w  . j  ava  2  s .c  o  m
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 *             If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.NO);
                doc.add(pathField);

                // Add the last modified date of the file a field named
                // "modified".
                // Use a LongField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special characters
                // will fail.
                //               doc.add(new TextField("contents", new BufferedReader(
                //                     new InputStreamReader(fis, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been indexed) so
                    // we use updateDocument instead to replace the old one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:org.Demo.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//  w ww.j a  va  2s  .co  m

        BufferedReader buff = new BufferedReader(
                new InputStreamReader(Files.newInputStream(file), StandardCharsets.UTF_8));

        String title = buff.readLine();
        buff.close();

        Field titleField = new StringField("title", title, Field.Store.YES);
        doc.add(titleField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java

License:Apache License

@Override
public void updateIndexingBufferSize(ByteSizeValue indexingBufferSize) {
    ByteSizeValue preValue = this.indexingBufferSize;
    try (InternalLock _ = readLock.acquire()) {
        this.indexingBufferSize = indexingBufferSize;
        IndexWriter indexWriter = this.indexWriter;
        if (indexWriter != null) {
            indexWriter.getConfig().setRAMBufferSizeMB(this.indexingBufferSize.mbFrac());
        }// w w w  .  ja v  a  2 s.c o m
    }
    if (preValue.bytes() != indexingBufferSize.bytes()) {
        // its inactive, make sure we do a full flush in this case, since the memory
        // changes only after a "data" change has happened to the writer
        if (indexingBufferSize == Engine.INACTIVE_SHARD_INDEXING_BUFFER
                && preValue != Engine.INACTIVE_SHARD_INDEXING_BUFFER) {
            logger.debug("updating index_buffer_size from [{}] to (inactive) [{}]", preValue,
                    indexingBufferSize);
            try {
                flush(new Flush().type(Flush.Type.COMMIT));
            } catch (EngineClosedException e) {
                // ignore
            } catch (FlushNotAllowedEngineException e) {
                // ignore
            } catch (Throwable e) {
                logger.warn("failed to flush after setting shard to inactive", e);
            }
        } else {
            logger.debug("updating index_buffer_size from [{}] to [{}]", preValue, indexingBufferSize);
        }
    }
}

From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java

License:Apache License

@Override
public void optimize(Optimize optimize) throws EngineException {
    if (optimizeMutex.compareAndSet(false, true)) {
        try (InternalLock _ = readLock.acquire()) {
            final IndexWriter writer = currentIndexWriter();

            /*/*  w w w  .jav  a  2  s  .c om*/
             * The way we implement upgrades is a bit hackish in the sense that we set an instance
             * variable and that this setting will thus apply to the next forced merge that will be run.
             * This is ok because (1) this is the only place we call forceMerge, (2) we have a single
             * thread for optimize, and the 'optimizeMutex' guarding this code, and (3) ConcurrentMergeScheduler
             * syncs calls to findForcedMerges.
             */
            MergePolicy mp = writer.getConfig().getMergePolicy();
            assert mp instanceof ElasticsearchMergePolicy : "MergePolicy is " + mp.getClass().getName();
            if (optimize.upgrade()) {
                ((ElasticsearchMergePolicy) mp).setUpgradeInProgress(true);
            }

            if (optimize.onlyExpungeDeletes()) {
                writer.forceMergeDeletes(false);
            } else if (optimize.maxNumSegments() <= 0) {
                writer.maybeMerge();
                possibleMergeNeeded = false;
            } else {
                writer.forceMerge(optimize.maxNumSegments(), false);
            }
        } catch (Throwable t) {
            maybeFailEngine(t, "optimize");
            throw new OptimizeFailedEngineException(shardId, t);
        } finally {
            optimizeMutex.set(false);
        }
    }

    // wait for the merges outside of the read lock
    if (optimize.waitForMerge()) {
        waitForMerges(optimize.flush());
    } else if (optimize.flush()) {
        // we only need to monitor merges for async calls if we are going to flush
        threadPool.executor(ThreadPool.Names.OPTIMIZE).execute(new AbstractRunnable() {
            @Override
            public void run() {
                try {
                    waitForMerges(true);
                } catch (Exception e) {
                    logger.error("Exception while waiting for merges asynchronously after optimize", e);
                }
            }
        });
    }
}

From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java

License:Apache License

@Override
public SegmentsStats segmentsStats() {
    // Does ensureOpen for us:                                                                                                                                                                               
    final IndexWriter indexWriter = currentIndexWriter();
    assert indexWriter != null;
    try (final Searcher searcher = acquireSearcher("segments_stats")) {
        SegmentsStats stats = new SegmentsStats();
        for (AtomicReaderContext reader : searcher.reader().leaves()) {
            stats.add(1, getReaderRamBytesUsed(reader));
        }//from   w w  w.j a  v  a2 s  .  com
        stats.addVersionMapMemoryInBytes(versionMap.ramBytesUsed());
        stats.addIndexWriterMemoryInBytes(indexWriter.ramBytesUsed());
        stats.addIndexWriterMaxMemoryInBytes(
                (long) (indexWriter.getConfig().getRAMBufferSizeMB() * 1024 * 1024));
        return stats;
    }
}

From source file:org.elasticsearch.index.engine.internal.InternalEngine.java

License:Apache License

@Override
public void updateIndexingBufferSize(ByteSizeValue indexingBufferSize) {
    ByteSizeValue preValue = this.indexingBufferSize;
    rwl.readLock().lock();/*  w w w  .  j  ava2s.co m*/
    try {
        this.indexingBufferSize = indexingBufferSize;
        IndexWriter indexWriter = this.indexWriter;
        if (indexWriter != null) {
            indexWriter.getConfig().setRAMBufferSizeMB(this.indexingBufferSize.mbFrac());
        }
    } finally {
        rwl.readLock().unlock();
    }
    if (preValue.bytes() != indexingBufferSize.bytes()) {
        // its inactive, make sure we do a full flush in this case, since the memory
        // changes only after a "data" change has happened to the writer
        if (indexingBufferSize == Engine.INACTIVE_SHARD_INDEXING_BUFFER
                && preValue != Engine.INACTIVE_SHARD_INDEXING_BUFFER) {
            logger.debug("updating index_buffer_size from [{}] to (inactive) [{}]", preValue,
                    indexingBufferSize);
            try {
                flush(new Flush().type(Flush.Type.NEW_WRITER));
            } catch (EngineClosedException e) {
                // ignore
            } catch (FlushNotAllowedEngineException e) {
                // ignore
            } catch (Throwable e) {
                logger.warn("failed to flush after setting shard to inactive", e);
            }
        } else {
            logger.debug("updating index_buffer_size from [{}] to [{}]", preValue, indexingBufferSize);
        }
    }
}

From source file:org.elasticsearch.index.engine.robin.RobinEngine.java

License:Apache License

@Override
public void updateIndexingBufferSize(ByteSizeValue indexingBufferSize) {
    ByteSizeValue preValue = this.indexingBufferSize;
    rwl.readLock().lock();//from   w ww  .ja  v  a2 s. co m
    try {
        // LUCENE MONITOR - If this restriction is removed from Lucene, remove it from here
        if (indexingBufferSize.mbFrac() > 2048.0) {
            this.indexingBufferSize = new ByteSizeValue(2048, ByteSizeUnit.MB);
        } else {
            this.indexingBufferSize = indexingBufferSize;
        }
        IndexWriter indexWriter = this.indexWriter;
        if (indexWriter != null) {
            indexWriter.getConfig().setRAMBufferSizeMB(this.indexingBufferSize.mbFrac());
        }
    } finally {
        rwl.readLock().unlock();
    }
    // its inactive, make sure we do a full flush in this case, since the memory
    // changes only after a "data" change has happened to the writer
    if (indexingBufferSize == Engine.INACTIVE_SHARD_INDEXING_BUFFER
            && preValue != Engine.INACTIVE_SHARD_INDEXING_BUFFER) {
        try {
            flush(new Flush().full(true));
        } catch (Exception e) {
            logger.warn("failed to flush after setting shard to inactive", e);
        }
    }
}

From source file:org.elasticsearch.index.merge.Merges.java

License:Apache License

/**
 * See {@link org.apache.lucene.index.IndexWriter#maybeMerge()}, with the additional
 * logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
 *///  w w  w .j  a v a 2s. c om
public static void maybeMerge(IndexWriter writer) throws IOException {
    MergeScheduler mergeScheduler = writer.getConfig().getMergeScheduler();
    if (mergeScheduler instanceof EnableMergeScheduler) {
        ((EnableMergeScheduler) mergeScheduler).enableMerge();
        try {
            writer.maybeMerge();
        } finally {
            ((EnableMergeScheduler) mergeScheduler).disableMerge();
        }
    } else {
        writer.maybeMerge();
    }
}

From source file:org.elasticsearch.index.merge.Merges.java

License:Apache License

/**
 * See {@link org.apache.lucene.index.IndexWriter#forceMerge(int, boolean)}, with the additional
 * logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
 *//*from   w ww  . jav  a  2 s  .com*/
public static void forceMerge(IndexWriter writer, int maxNumSegments, boolean doWait) throws IOException {
    MergeScheduler mergeScheduler = writer.getConfig().getMergeScheduler();
    if (mergeScheduler instanceof EnableMergeScheduler) {
        ((EnableMergeScheduler) mergeScheduler).enableMerge();
        try {
            writer.forceMerge(maxNumSegments, doWait);
        } finally {
            ((EnableMergeScheduler) mergeScheduler).disableMerge();
        }
    } else {
        writer.forceMerge(maxNumSegments, doWait);
    }
}