List of usage examples for org.apache.lucene.index IndexWriter getConfig
public LiveIndexWriterConfig getConfig()
From source file:org.archive.index.AsAReference.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * /*from ww w .j a v a 2 s .c o m*/ * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 4 would mean // February 17, 1, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:org.bidtime.lucene.utils.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is * given, recurses over files and directories found under the given * directory.//from w w w . j ava 2 s .c o m * * NOTE: This method indexes one document per input file. This is slow. For * good throughput, put multiple documents into your input file(s). An * example of this is in the benchmark module, which can create "line doc" * files, one document per line, using the <a href= * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer * Writer to the index where the given file/dir info will be * stored * @param file * The file to index, or the directory to recurse into to find * files to index * @throws IOException * If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this // exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't // tokenize // the field into separate words and don't index term // frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.NO); doc.add(pathField); // Add the last modified date of the file a field named // "modified". // Use a LongField that is indexed (i.e. efficiently // filterable with // NumericRangeFilter). This indexes to milli-second // resolution, which // is often too fine. You could instead create a number // based on // year/month/day/hour/minutes/seconds, down the resolution // you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". // Specify a Reader, // so that the text of the file is tokenized and indexed, // but not stored. // Note that FileReader expects the file to be in UTF-8 // encoding. // If that's not the case searching for special characters // will fail. // doc.add(new TextField("contents", new BufferedReader( // new InputStreamReader(fis, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have // been indexed) so // we use updateDocument instead to replace the old one // matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:org.Demo.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);// w ww.j a va 2s .co m BufferedReader buff = new BufferedReader( new InputStreamReader(Files.newInputStream(file), StandardCharsets.UTF_8)); String title = buff.readLine(); buff.close(); Field titleField = new StringField("title", title, Field.Store.YES); doc.add(titleField); // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java
License:Apache License
@Override public void updateIndexingBufferSize(ByteSizeValue indexingBufferSize) { ByteSizeValue preValue = this.indexingBufferSize; try (InternalLock _ = readLock.acquire()) { this.indexingBufferSize = indexingBufferSize; IndexWriter indexWriter = this.indexWriter; if (indexWriter != null) { indexWriter.getConfig().setRAMBufferSizeMB(this.indexingBufferSize.mbFrac()); }// w w w . ja v a 2 s.c o m } if (preValue.bytes() != indexingBufferSize.bytes()) { // its inactive, make sure we do a full flush in this case, since the memory // changes only after a "data" change has happened to the writer if (indexingBufferSize == Engine.INACTIVE_SHARD_INDEXING_BUFFER && preValue != Engine.INACTIVE_SHARD_INDEXING_BUFFER) { logger.debug("updating index_buffer_size from [{}] to (inactive) [{}]", preValue, indexingBufferSize); try { flush(new Flush().type(Flush.Type.COMMIT)); } catch (EngineClosedException e) { // ignore } catch (FlushNotAllowedEngineException e) { // ignore } catch (Throwable e) { logger.warn("failed to flush after setting shard to inactive", e); } } else { logger.debug("updating index_buffer_size from [{}] to [{}]", preValue, indexingBufferSize); } } }
From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java
License:Apache License
@Override public void optimize(Optimize optimize) throws EngineException { if (optimizeMutex.compareAndSet(false, true)) { try (InternalLock _ = readLock.acquire()) { final IndexWriter writer = currentIndexWriter(); /*/* w w w .jav a 2 s .c om*/ * The way we implement upgrades is a bit hackish in the sense that we set an instance * variable and that this setting will thus apply to the next forced merge that will be run. * This is ok because (1) this is the only place we call forceMerge, (2) we have a single * thread for optimize, and the 'optimizeMutex' guarding this code, and (3) ConcurrentMergeScheduler * syncs calls to findForcedMerges. */ MergePolicy mp = writer.getConfig().getMergePolicy(); assert mp instanceof ElasticsearchMergePolicy : "MergePolicy is " + mp.getClass().getName(); if (optimize.upgrade()) { ((ElasticsearchMergePolicy) mp).setUpgradeInProgress(true); } if (optimize.onlyExpungeDeletes()) { writer.forceMergeDeletes(false); } else if (optimize.maxNumSegments() <= 0) { writer.maybeMerge(); possibleMergeNeeded = false; } else { writer.forceMerge(optimize.maxNumSegments(), false); } } catch (Throwable t) { maybeFailEngine(t, "optimize"); throw new OptimizeFailedEngineException(shardId, t); } finally { optimizeMutex.set(false); } } // wait for the merges outside of the read lock if (optimize.waitForMerge()) { waitForMerges(optimize.flush()); } else if (optimize.flush()) { // we only need to monitor merges for async calls if we are going to flush threadPool.executor(ThreadPool.Names.OPTIMIZE).execute(new AbstractRunnable() { @Override public void run() { try { waitForMerges(true); } catch (Exception e) { logger.error("Exception while waiting for merges asynchronously after optimize", e); } } }); } }
From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java
License:Apache License
@Override public SegmentsStats segmentsStats() { // Does ensureOpen for us: final IndexWriter indexWriter = currentIndexWriter(); assert indexWriter != null; try (final Searcher searcher = acquireSearcher("segments_stats")) { SegmentsStats stats = new SegmentsStats(); for (AtomicReaderContext reader : searcher.reader().leaves()) { stats.add(1, getReaderRamBytesUsed(reader)); }//from w w w.j a v a2 s . com stats.addVersionMapMemoryInBytes(versionMap.ramBytesUsed()); stats.addIndexWriterMemoryInBytes(indexWriter.ramBytesUsed()); stats.addIndexWriterMaxMemoryInBytes( (long) (indexWriter.getConfig().getRAMBufferSizeMB() * 1024 * 1024)); return stats; } }
From source file:org.elasticsearch.index.engine.internal.InternalEngine.java
License:Apache License
@Override public void updateIndexingBufferSize(ByteSizeValue indexingBufferSize) { ByteSizeValue preValue = this.indexingBufferSize; rwl.readLock().lock();/* w w w . j ava2s.co m*/ try { this.indexingBufferSize = indexingBufferSize; IndexWriter indexWriter = this.indexWriter; if (indexWriter != null) { indexWriter.getConfig().setRAMBufferSizeMB(this.indexingBufferSize.mbFrac()); } } finally { rwl.readLock().unlock(); } if (preValue.bytes() != indexingBufferSize.bytes()) { // its inactive, make sure we do a full flush in this case, since the memory // changes only after a "data" change has happened to the writer if (indexingBufferSize == Engine.INACTIVE_SHARD_INDEXING_BUFFER && preValue != Engine.INACTIVE_SHARD_INDEXING_BUFFER) { logger.debug("updating index_buffer_size from [{}] to (inactive) [{}]", preValue, indexingBufferSize); try { flush(new Flush().type(Flush.Type.NEW_WRITER)); } catch (EngineClosedException e) { // ignore } catch (FlushNotAllowedEngineException e) { // ignore } catch (Throwable e) { logger.warn("failed to flush after setting shard to inactive", e); } } else { logger.debug("updating index_buffer_size from [{}] to [{}]", preValue, indexingBufferSize); } } }
From source file:org.elasticsearch.index.engine.robin.RobinEngine.java
License:Apache License
@Override public void updateIndexingBufferSize(ByteSizeValue indexingBufferSize) { ByteSizeValue preValue = this.indexingBufferSize; rwl.readLock().lock();//from w ww .ja v a2 s. co m try { // LUCENE MONITOR - If this restriction is removed from Lucene, remove it from here if (indexingBufferSize.mbFrac() > 2048.0) { this.indexingBufferSize = new ByteSizeValue(2048, ByteSizeUnit.MB); } else { this.indexingBufferSize = indexingBufferSize; } IndexWriter indexWriter = this.indexWriter; if (indexWriter != null) { indexWriter.getConfig().setRAMBufferSizeMB(this.indexingBufferSize.mbFrac()); } } finally { rwl.readLock().unlock(); } // its inactive, make sure we do a full flush in this case, since the memory // changes only after a "data" change has happened to the writer if (indexingBufferSize == Engine.INACTIVE_SHARD_INDEXING_BUFFER && preValue != Engine.INACTIVE_SHARD_INDEXING_BUFFER) { try { flush(new Flush().full(true)); } catch (Exception e) { logger.warn("failed to flush after setting shard to inactive", e); } } }
From source file:org.elasticsearch.index.merge.Merges.java
License:Apache License
/** * See {@link org.apache.lucene.index.IndexWriter#maybeMerge()}, with the additional * logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}. */// w w w .j a v a 2s. c om public static void maybeMerge(IndexWriter writer) throws IOException { MergeScheduler mergeScheduler = writer.getConfig().getMergeScheduler(); if (mergeScheduler instanceof EnableMergeScheduler) { ((EnableMergeScheduler) mergeScheduler).enableMerge(); try { writer.maybeMerge(); } finally { ((EnableMergeScheduler) mergeScheduler).disableMerge(); } } else { writer.maybeMerge(); } }
From source file:org.elasticsearch.index.merge.Merges.java
License:Apache License
/** * See {@link org.apache.lucene.index.IndexWriter#forceMerge(int, boolean)}, with the additional * logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}. *//*from w ww . jav a 2 s .com*/ public static void forceMerge(IndexWriter writer, int maxNumSegments, boolean doWait) throws IOException { MergeScheduler mergeScheduler = writer.getConfig().getMergeScheduler(); if (mergeScheduler instanceof EnableMergeScheduler) { ((EnableMergeScheduler) mergeScheduler).enableMerge(); try { writer.forceMerge(maxNumSegments, doWait); } finally { ((EnableMergeScheduler) mergeScheduler).disableMerge(); } } else { writer.forceMerge(maxNumSegments, doWait); } }