List of usage examples for org.apache.lucene.index IndexWriterConfig setCodec
public IndexWriterConfig setCodec(Codec codec)
From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { try {//from w ww . java 2 s . c om boolean create = !Lucene.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setInfoStream(new LoggerInfoStream(indexSettings, shardId)); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); MergePolicy mergePolicy = mergePolicyProvider.getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); config.setMergePolicy(mergePolicy); config.setSimilarity(similarityService.similarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setMaxThreadStates(indexConcurrency); config.setCodec(codecService.codec(codecName)); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ config.setWriteLockTimeout(5000); config.setUseCompoundFile(this.compoundOnFlush); config.setCheckIntegrityAtMerge(checksumOnMerge); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh config.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(AtomicReader reader) throws IOException { try { assert isMergedSegment(reader); if (warmer != null) { final Engine.Searcher searcher = new SimpleSearcher("warmer", new IndexSearcher(reader)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); warmer.warmNewReaders(context); } } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (!closed) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), config); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.index.engine.internal.InternalEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { try {/*from w w w . ja v a2 s .com*/ // release locks when started if (IndexWriter.isLocked(store.directory())) { logger.warn("shard is locked, releasing lock"); IndexWriter.unlock(store.directory()); } boolean create = !Lucene.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); MergePolicy mergePolicy = mergePolicyProvider.newMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new IndexUpgraderMergePolicy(mergePolicy); config.setMergePolicy(mergePolicy); config.setSimilarity(similarityService.similarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setMaxThreadStates(indexConcurrency); config.setCodec(codecService.codec(codecName)); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ config.setWriteLockTimeout(5000); config.setUseCompoundFile(this.compoundOnFlush); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh config.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(AtomicReader reader) throws IOException { try { assert isMergedSegment(reader); final Engine.Searcher searcher = new SimpleSearcher("warmer", new IndexSearcher(reader)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); if (warmer != null) warmer.warm(context); } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (!closed) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), config); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.index.engine.InternalEngine.java
License:Apache License
private IndexWriter createWriter(boolean create) throws IOException { try {/*ww w. jav a 2s. c o m*/ final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); iwc.setIndexDeletionPolicy(deletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; try { verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); } catch (Throwable ignore) { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); MergePolicy mergePolicy = config().getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); iwc.setMergePolicy(mergePolicy); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().mbFrac()); iwc.setCodec(engineConfig.getCodec()); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ iwc.setWriteLockTimeout(5000); iwc.setUseCompoundFile(this.engineConfig.isCompoundOnFlush()); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh iwc.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(LeafReader reader) throws IOException { try { LeafReader esLeafReader = new ElasticsearchLeafReader(reader, shardId); assert isMergedSegment(esLeafReader); if (warmer != null) { final Engine.Searcher searcher = new Searcher("warmer", searcherFactory.newSearcher(esLeafReader, null)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); warmer.warmNewReaders(context); } } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (isClosed.get() == false) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), iwc); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.search.suggest.completion.CompletionPostingsFormatTest.java
License:Apache License
public Lookup buildAnalyzingLookup(final CompletionFieldMapper mapper, String[] terms, String[] surfaces, long[] weights) throws IOException { RAMDirectory dir = new RAMDirectory(); FilterCodec filterCodec = new FilterCodec("filtered", Codec.getDefault()) { public PostingsFormat postingsFormat() { return mapper.postingsFormatProvider().get(); }/*from w w w . j a v a 2s . com*/ }; IndexWriterConfig indexWriterConfig = new IndexWriterConfig(TEST_VERSION_CURRENT, mapper.indexAnalyzer()); indexWriterConfig.setCodec(filterCodec); IndexWriter writer = new IndexWriter(dir, indexWriterConfig); for (int i = 0; i < weights.length; i++) { Document doc = new Document(); BytesRef payload = mapper.buildPayload(new BytesRef(surfaces[i]), weights[i], new BytesRef(Long.toString(weights[i]))); doc.add(mapper.getCompletionField(terms[i], payload)); if (randomBoolean()) { writer.commit(); } writer.addDocument(doc); } writer.commit(); writer.forceMerge(1); writer.commit(); DirectoryReader reader = DirectoryReader.open(writer, true); assertThat(reader.leaves().size(), equalTo(1)); assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length)); AtomicReaderContext atomicReaderContext = reader.leaves().get(0); Terms luceneTerms = atomicReaderContext.reader().terms(mapper.name()); Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms).getLookup(mapper, new CompletionSuggestionContext(null)); reader.close(); writer.close(); dir.close(); return lookup; }
From source file:org.elasticsearch.search.suggest.completion.CompletionPostingsFormatTests.java
License:Apache License
public Lookup buildAnalyzingLookup(final CompletionFieldMapper mapper, String[] terms, String[] surfaces, long[] weights) throws IOException { RAMDirectory dir = new RAMDirectory(); Codec codec = new Lucene54Codec() { @Override/* w ww . j av a 2 s . c o m*/ public PostingsFormat getPostingsFormatForField(String field) { final PostingsFormat in = super.getPostingsFormatForField(field); return mapper.fieldType().postingsFormat(in); } }; IndexWriterConfig indexWriterConfig = new IndexWriterConfig(mapper.fieldType().indexAnalyzer()); indexWriterConfig.setCodec(codec); IndexWriter writer = new IndexWriter(dir, indexWriterConfig); for (int i = 0; i < weights.length; i++) { Document doc = new Document(); BytesRef payload = mapper.buildPayload(new BytesRef(surfaces[i]), weights[i], new BytesRef(Long.toString(weights[i]))); doc.add(mapper.getCompletionField(ContextMapping.EMPTY_CONTEXT, terms[i], payload)); if (randomBoolean()) { writer.commit(); } writer.addDocument(doc); } writer.commit(); writer.forceMerge(1, true); writer.commit(); DirectoryReader reader = DirectoryReader.open(writer, true); assertThat(reader.leaves().size(), equalTo(1)); assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length)); LeafReaderContext atomicReaderContext = reader.leaves().get(0); Terms luceneTerms = atomicReaderContext.reader().terms(mapper.fieldType().names().fullName()); Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms).getLookup(mapper.fieldType(), new CompletionSuggestionContext(null)); reader.close(); writer.close(); dir.close(); return lookup; }
From source file:org.elasticsearch.search.suggest.completion.old.CompletionPostingsFormatTest.java
License:Apache License
public Lookup buildAnalyzingLookup(final OldCompletionFieldMapper mapper, String[] terms, String[] surfaces, long[] weights) throws IOException { RAMDirectory dir = new RAMDirectory(); Codec codec = new Lucene50Codec() { public PostingsFormat getPostingsFormatForField(String field) { final PostingsFormat in = super.getPostingsFormatForField(field); return mapper.fieldType().postingsFormat(in); }//from w w w. j ava2s. co m }; IndexWriterConfig indexWriterConfig = new IndexWriterConfig(mapper.fieldType().indexAnalyzer()); indexWriterConfig.setCodec(codec); IndexWriter writer = new IndexWriter(dir, indexWriterConfig); for (int i = 0; i < weights.length; i++) { Document doc = new Document(); BytesRef payload = mapper.buildPayload(new BytesRef(surfaces[i]), weights[i], new BytesRef(Long.toString(weights[i]))); doc.add(mapper.getCompletionField(ContextMapping.EMPTY_CONTEXT, terms[i], payload)); if (randomBoolean()) { writer.commit(); } writer.addDocument(doc); } writer.commit(); writer.forceMerge(1, true); writer.commit(); DirectoryReader reader = DirectoryReader.open(writer, true); assertThat(reader.leaves().size(), equalTo(1)); assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length)); LeafReaderContext atomicReaderContext = reader.leaves().get(0); Terms luceneTerms = atomicReaderContext.reader().terms(mapper.fieldType().names().fullName()); Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms).getLookup(mapper.fieldType(), new CompletionSuggestionContext(null)); reader.close(); writer.close(); dir.close(); return lookup; }
From source file:org.elasticsearch.test.integration.search.suggest.CompletionPostingsFormatTest.java
License:Apache License
public Lookup buildAnalyzingLookup(final CompletionFieldMapper mapper, String[] terms, String[] surfaces, long[] weights) throws IOException { RAMDirectory dir = new RAMDirectory(); FilterCodec filterCodec = new FilterCodec("filtered", Codec.getDefault()) { public PostingsFormat postingsFormat() { return mapper.postingsFormatProvider().get(); }//from w ww . ja v a 2 s. co m }; IndexWriterConfig indexWriterConfig = new IndexWriterConfig(TEST_VERSION_CURRENT, mapper.indexAnalyzer()); indexWriterConfig.setCodec(filterCodec); IndexWriter writer = new IndexWriter(dir, indexWriterConfig); for (int i = 0; i < weights.length; i++) { Document doc = new Document(); BytesRef payload = mapper.buildPayload(new BytesRef(surfaces[i]), weights[i], new BytesRef(Long.toString(weights[i]))); doc.add(mapper.getCompletionField(terms[i], payload)); if (randomBoolean()) { writer.commit(); } writer.addDocument(doc); } writer.commit(); writer.forceMerge(1); writer.commit(); DirectoryReader reader = DirectoryReader.open(writer, true); assertThat(reader.leaves().size(), equalTo(1)); assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length)); AtomicReaderContext atomicReaderContext = reader.leaves().get(0); Terms luceneTerms = atomicReaderContext.reader().terms(mapper.name()); Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms).getLookup(mapper, false); reader.close(); writer.close(); dir.close(); return lookup; }
From source file:org.exist.xquery.modules.mpeg7.net.semanticmetadata.lire.indexing.parallel.ParallelIndexer.java
License:Open Source License
public void run() { IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new StandardAnalyzer(LuceneUtils.LUCENE_VERSION)); config.setOpenMode(openMode);// www . j a va 2 s.c om config.setCodec(new LireCustomCodec()); try { if (imageDirectory != null) System.out.println("Getting all images in " + imageDirectory + "."); writer = new IndexWriter(FSDirectory.open(new File(indexPath)), config); if (imageList == null) { files = FileUtils.getAllImages(new File(imageDirectory), true); } else { files = new LinkedList<String>(); BufferedReader br = new BufferedReader(new FileReader(imageList)); String line = null; while ((line = br.readLine()) != null) { if (line.trim().length() > 3) files.add(line.trim()); } } numImages = files.size(); System.out.println("Indexing " + files.size() + " images."); Thread p = new Thread(new Producer()); p.start(); LinkedList<Thread> threads = new LinkedList<Thread>(); long l = System.currentTimeMillis(); for (int i = 0; i < numberOfThreads; i++) { Thread c = new Thread(new Consumer()); c.start(); threads.add(c); } Thread m = new Thread(new Monitoring()); m.start(); for (Iterator<Thread> iterator = threads.iterator(); iterator.hasNext();) { iterator.next().join(); } long l1 = System.currentTimeMillis() - l; System.out.println("Analyzed " + overallCount + " images in " + l1 / 1000 + " seconds, ~" + ((overallCount > 0) ? (l1 / overallCount) : "n.a.") + " ms each."); writer.commit(); writer.close(); threadFinished = true; } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:org.exist.xquery.modules.mpeg7.net.semanticmetadata.lire.utils.LuceneUtils.java
License:Open Source License
/** * Creates an IndexWriter for given index path, with given analyzer. * * @param directory the path to the index directory * @param create set to true if you want to create a new index * @param analyzer gives the analyzer used for the Indexwriter. * @return an IndexWriter/* w w w .j ava 2 s. co m*/ * @throws IOException */ public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null; if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(LUCENE_VERSION); // LetterTokenizer with LowerCaseFilter else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(LUCENE_VERSION); // WhitespaceTokenizer else if (analyzer == AnalyzerType.KeywordAnalyzer) tmpAnalyzer = new KeywordAnalyzer(); // entire string as one token. // The config IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, tmpAnalyzer); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setCodec(new LireCustomCodec()); return new IndexWriter(directory, config); }
From source file:org.exist.xquery.modules.mpeg7.net.semanticmetadata.lire.utils.LuceneUtils.java
License:Open Source License
public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer, double RAMBufferSize) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null;/*from w w w .j ava 2s. c om*/ if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(LUCENE_VERSION); else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(LUCENE_VERSION); // The config IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, tmpAnalyzer); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setRAMBufferSizeMB(RAMBufferSize); config.setCodec(new LireCustomCodec()); return new IndexWriter(directory, config); }