Example usage for org.apache.lucene.index IndexWriter getConfig

List of usage examples for org.apache.lucene.index IndexWriter getConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter getConfig.

Prototype

public LiveIndexWriterConfig getConfig() 

Source Link

Document

Returns a LiveIndexWriterConfig , which can be used to query the IndexWriter current settings, as well as modify "live" ones.

Usage

From source file:org.archive.index.AsAReference.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * /*from ww w .j a v  a  2  s .c o m*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 4 would mean
                // February 17, 1, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents",
                        new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:org.bidtime.lucene.utils.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory.//from  w w  w  . j  ava  2  s .c  o  m
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 *             If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.NO);
                doc.add(pathField);

                // Add the last modified date of the file a field named
                // "modified".
                // Use a LongField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special characters
                // will fail.
                //               doc.add(new TextField("contents", new BufferedReader(
                //                     new InputStreamReader(fis, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been indexed) so
                    // we use updateDocument instead to replace the old one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:org.Demo.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//  w ww.j a  va  2s  .co  m

        BufferedReader buff = new BufferedReader(
                new InputStreamReader(Files.newInputStream(file), StandardCharsets.UTF_8));

        String title = buff.readLine();
        buff.close();

        Field titleField = new StringField("title", title, Field.Store.YES);
        doc.add(titleField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java

License:Apache License

@Override
public void updateIndexingBufferSize(ByteSizeValue indexingBufferSize) {
    ByteSizeValue preValue = this.indexingBufferSize;
    try (InternalLock _ = readLock.acquire()) {
        this.indexingBufferSize = indexingBufferSize;
        IndexWriter indexWriter = this.indexWriter;
        if (indexWriter != null) {
            indexWriter.getConfig().setRAMBufferSizeMB(this.indexingBufferSize.mbFrac());
        }// w w w  .  ja v  a  2 s.c o m
    }
    if (preValue.bytes() != indexingBufferSize.bytes()) {
        // its inactive, make sure we do a full flush in this case, since the memory
        // changes only after a "data" change has happened to the writer
        if (indexingBufferSize == Engine.INACTIVE_SHARD_INDEXING_BUFFER
                && preValue != Engine.INACTIVE_SHARD_INDEXING_BUFFER) {
            logger.debug("updating index_buffer_size from [{}] to (inactive) [{}]", preValue,
                    indexingBufferSize);
            try {
                flush(new Flush().type(Flush.Type.COMMIT));
            } catch (EngineClosedException e) {
                // ignore
            } catch (FlushNotAllowedEngineException e) {
                // ignore
            } catch (Throwable e) {
                logger.warn("failed to flush after setting shard to inactive", e);
            }
        } else {
            logger.debug("updating index_buffer_size from [{}] to [{}]", preValue, indexingBufferSize);
        }
    }
}

From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java

License:Apache License

@Override
public void optimize(Optimize optimize) throws EngineException {
    if (optimizeMutex.compareAndSet(false, true)) {
        try (InternalLock _ = readLock.acquire()) {
            final IndexWriter writer = currentIndexWriter();

            /*/*  w w w  .jav  a  2  s  .c om*/
             * The way we implement upgrades is a bit hackish in the sense that we set an instance
             * variable and that this setting will thus apply to the next forced merge that will be run.
             * This is ok because (1) this is the only place we call forceMerge, (2) we have a single
             * thread for optimize, and the 'optimizeMutex' guarding this code, and (3) ConcurrentMergeScheduler
             * syncs calls to findForcedMerges.
             */
            MergePolicy mp = writer.getConfig().getMergePolicy();
            assert mp instanceof ElasticsearchMergePolicy : "MergePolicy is " + mp.getClass().getName();
            if (optimize.upgrade()) {
                ((ElasticsearchMergePolicy) mp).setUpgradeInProgress(true);
            }

            if (optimize.onlyExpungeDeletes()) {
                writer.forceMergeDeletes(false);
            } else if (optimize.maxNumSegments() <= 0) {
                writer.maybeMerge();
                possibleMergeNeeded = false;
            } else {
                writer.forceMerge(optimize.maxNumSegments(), false);
            }
        } catch (Throwable t) {
            maybeFailEngine(t, "optimize");
            throw new OptimizeFailedEngineException(shardId, t);
        } finally {
            optimizeMutex.set(false);
        }
    }

    // wait for the merges outside of the read lock
    if (optimize.waitForMerge()) {
        waitForMerges(optimize.flush());
    } else if (optimize.flush()) {
        // we only need to monitor merges for async calls if we are going to flush
        threadPool.executor(ThreadPool.Names.OPTIMIZE).execute(new AbstractRunnable() {
            @Override
            public void run() {
                try {
                    waitForMerges(true);
                } catch (Exception e) {
                    logger.error("Exception while waiting for merges asynchronously after optimize", e);
                }
            }
        });
    }
}

From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java

License:Apache License

@Override
public SegmentsStats segmentsStats() {
    // Does ensureOpen for us:                                                                                                                                                                               
    final IndexWriter indexWriter = currentIndexWriter();
    assert indexWriter != null;
    try (final Searcher searcher = acquireSearcher("segments_stats")) {
        SegmentsStats stats = new SegmentsStats();
        for (AtomicReaderContext reader : searcher.reader().leaves()) {
            stats.add(1, getReaderRamBytesUsed(reader));
        }//from   w w  w.j a  v  a2 s  .  com
        stats.addVersionMapMemoryInBytes(versionMap.ramBytesUsed());
        stats.addIndexWriterMemoryInBytes(indexWriter.ramBytesUsed());
        stats.addIndexWriterMaxMemoryInBytes(
                (long) (indexWriter.getConfig().getRAMBufferSizeMB() * 1024 * 1024));
        return stats;
    }
}

From source file:org.elasticsearch.index.engine.internal.InternalEngine.java

License:Apache License

@Override
public void updateIndexingBufferSize(ByteSizeValue indexingBufferSize) {
    ByteSizeValue preValue = this.indexingBufferSize;
    rwl.readLock().lock();/*  w w w  .  j  ava2s.co m*/
    try {
        this.indexingBufferSize = indexingBufferSize;
        IndexWriter indexWriter = this.indexWriter;
        if (indexWriter != null) {
            indexWriter.getConfig().setRAMBufferSizeMB(this.indexingBufferSize.mbFrac());
        }
    } finally {
        rwl.readLock().unlock();
    }
    if (preValue.bytes() != indexingBufferSize.bytes()) {
        // its inactive, make sure we do a full flush in this case, since the memory
        // changes only after a "data" change has happened to the writer
        if (indexingBufferSize == Engine.INACTIVE_SHARD_INDEXING_BUFFER
                && preValue != Engine.INACTIVE_SHARD_INDEXING_BUFFER) {
            logger.debug("updating index_buffer_size from [{}] to (inactive) [{}]", preValue,
                    indexingBufferSize);
            try {
                flush(new Flush().type(Flush.Type.NEW_WRITER));
            } catch (EngineClosedException e) {
                // ignore
            } catch (FlushNotAllowedEngineException e) {
                // ignore
            } catch (Throwable e) {
                logger.warn("failed to flush after setting shard to inactive", e);
            }
        } else {
            logger.debug("updating index_buffer_size from [{}] to [{}]", preValue, indexingBufferSize);
        }
    }
}

From source file:org.elasticsearch.index.engine.robin.RobinEngine.java

License:Apache License

@Override
public void updateIndexingBufferSize(ByteSizeValue indexingBufferSize) {
    ByteSizeValue preValue = this.indexingBufferSize;
    rwl.readLock().lock();//from   w ww  .ja  v  a2 s. co m
    try {
        // LUCENE MONITOR - If this restriction is removed from Lucene, remove it from here
        if (indexingBufferSize.mbFrac() > 2048.0) {
            this.indexingBufferSize = new ByteSizeValue(2048, ByteSizeUnit.MB);
        } else {
            this.indexingBufferSize = indexingBufferSize;
        }
        IndexWriter indexWriter = this.indexWriter;
        if (indexWriter != null) {
            indexWriter.getConfig().setRAMBufferSizeMB(this.indexingBufferSize.mbFrac());
        }
    } finally {
        rwl.readLock().unlock();
    }
    // its inactive, make sure we do a full flush in this case, since the memory
    // changes only after a "data" change has happened to the writer
    if (indexingBufferSize == Engine.INACTIVE_SHARD_INDEXING_BUFFER
            && preValue != Engine.INACTIVE_SHARD_INDEXING_BUFFER) {
        try {
            flush(new Flush().full(true));
        } catch (Exception e) {
            logger.warn("failed to flush after setting shard to inactive", e);
        }
    }
}

From source file:org.elasticsearch.index.merge.Merges.java

License:Apache License

/**
 * See {@link org.apache.lucene.index.IndexWriter#maybeMerge()}, with the additional
 * logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
 *///  w w  w .j  a v a 2s. c om
public static void maybeMerge(IndexWriter writer) throws IOException {
    MergeScheduler mergeScheduler = writer.getConfig().getMergeScheduler();
    if (mergeScheduler instanceof EnableMergeScheduler) {
        ((EnableMergeScheduler) mergeScheduler).enableMerge();
        try {
            writer.maybeMerge();
        } finally {
            ((EnableMergeScheduler) mergeScheduler).disableMerge();
        }
    } else {
        writer.maybeMerge();
    }
}

From source file:org.elasticsearch.index.merge.Merges.java

License:Apache License

/**
 * See {@link org.apache.lucene.index.IndexWriter#forceMerge(int, boolean)}, with the additional
 * logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
 *//*from   w ww  . jav  a  2 s  .com*/
public static void forceMerge(IndexWriter writer, int maxNumSegments, boolean doWait) throws IOException {
    MergeScheduler mergeScheduler = writer.getConfig().getMergeScheduler();
    if (mergeScheduler instanceof EnableMergeScheduler) {
        ((EnableMergeScheduler) mergeScheduler).enableMerge();
        try {
            writer.forceMerge(maxNumSegments, doWait);
        } finally {
            ((EnableMergeScheduler) mergeScheduler).disableMerge();
        }
    } else {
        writer.forceMerge(maxNumSegments, doWait);
    }
}