List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:org.elasticsearch.action.termvector.TermVectorUnitTests.java
License:Apache License
private void writeEmptyTermVector(TermVectorResponse outResponse) throws IOException { Directory dir = newDirectory();/*from w w w . j av a2s. com*/ IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document d = new Document(); d.add(new Field("id", "abc", StringField.TYPE_STORED)); writer.updateDocument(new Term("id", "abc"), d); writer.commit(); writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1); ScoreDoc[] scoreDocs = search.scoreDocs; int doc = scoreDocs[0].doc; Fields fields = dr.getTermVectors(doc); EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets); outResponse.setFields(fields, null, flags, fields); outResponse.setExists(true); dr.close(); dir.close(); }
From source file:org.elasticsearch.action.termvector.TermVectorUnitTests.java
License:Apache License
private void writeStandardTermVector(TermVectorResponse outResponse) throws IOException { Directory dir = newDirectory();//from w w w . j a va2s . c om IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document d = new Document(); d.add(new Field("id", "abc", StringField.TYPE_STORED)); d.add(new Field("title", "the1 quick brown fox jumps over the1 lazy dog", type)); d.add(new Field("desc", "the1 quick brown fox jumps over the1 lazy dog", type)); writer.updateDocument(new Term("id", "abc"), d); writer.commit(); writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1); ScoreDoc[] scoreDocs = search.scoreDocs; int doc = scoreDocs[0].doc; Fields termVectors = dr.getTermVectors(doc); EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets); outResponse.setFields(termVectors, null, flags, termVectors); dr.close(); dir.close(); }
From source file:org.elasticsearch.action.termvectors.AbstractTermVectorsTestCase.java
License:Apache License
protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException { Map<String, Analyzer> mapping = new HashMap<>(); for (TestFieldSetting field : testDocs[0].fieldSettings) { if (field.storedPayloads) { mapping.put(field.name, new Analyzer() { @Override/*from w w w .j a v a2 s.c o m*/ protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new StandardTokenizer(); TokenFilter filter = new LowerCaseFilter(tokenizer); filter = new TypeAsPayloadTokenFilter(filter); return new TokenStreamComponents(tokenizer, filter); } }); } } PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), mapping); Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(wrapper); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); for (TestDoc doc : testDocs) { Document d = new Document(); d.add(new Field("id", doc.id, StringField.TYPE_STORED)); for (int i = 0; i < doc.fieldContent.length; i++) { FieldType type = new FieldType(TextField.TYPE_STORED); TestFieldSetting fieldSetting = doc.fieldSettings[i]; type.setStoreTermVectorOffsets(fieldSetting.storedOffset); type.setStoreTermVectorPayloads(fieldSetting.storedPayloads); type.setStoreTermVectorPositions( fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset); type.setStoreTermVectors(true); type.freeze(); d.add(new Field(fieldSetting.name, doc.fieldContent[i], type)); } writer.updateDocument(new Term("id", doc.id), d); writer.commit(); } writer.close(); return DirectoryReader.open(dir); }
From source file:org.elasticsearch.action.termvectors.TermVectorsUnitTests.java
License:Apache License
private void writeEmptyTermVector(TermVectorsResponse outResponse) throws IOException { Directory dir = newDirectory();/*from ww w . j a va 2 s . c o m*/ IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer()); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document d = new Document(); d.add(new Field("id", "abc", StringField.TYPE_STORED)); writer.updateDocument(new Term("id", "abc"), d); writer.commit(); writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1); ScoreDoc[] scoreDocs = search.scoreDocs; int doc = scoreDocs[0].doc; Fields fields = dr.getTermVectors(doc); EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets); outResponse.setFields(fields, null, flags, fields); outResponse.setExists(true); dr.close(); dir.close(); }
From source file:org.elasticsearch.action.termvectors.TermVectorsUnitTests.java
License:Apache License
private void writeStandardTermVector(TermVectorsResponse outResponse) throws IOException { Directory dir = newDirectory();//from w w w . j av a2s .co m IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer()); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document d = new Document(); d.add(new Field("id", "abc", StringField.TYPE_STORED)); d.add(new Field("title", "the1 quick brown fox jumps over the1 lazy dog", type)); d.add(new Field("desc", "the1 quick brown fox jumps over the1 lazy dog", type)); writer.updateDocument(new Term("id", "abc"), d); writer.commit(); writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1); ScoreDoc[] scoreDocs = search.scoreDocs; int doc = scoreDocs[0].doc; Fields termVectors = dr.getTermVectors(doc); EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets); outResponse.setFields(termVectors, null, flags, termVectors); dr.close(); dir.close(); }
From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { try {//from ww w . ja va2 s .c o m boolean create = !Lucene.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setInfoStream(new LoggerInfoStream(indexSettings, shardId)); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); MergePolicy mergePolicy = mergePolicyProvider.getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); config.setMergePolicy(mergePolicy); config.setSimilarity(similarityService.similarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setMaxThreadStates(indexConcurrency); config.setCodec(codecService.codec(codecName)); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ config.setWriteLockTimeout(5000); config.setUseCompoundFile(this.compoundOnFlush); config.setCheckIntegrityAtMerge(checksumOnMerge); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh config.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(AtomicReader reader) throws IOException { try { assert isMergedSegment(reader); if (warmer != null) { final Engine.Searcher searcher = new SimpleSearcher("warmer", new IndexSearcher(reader)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); warmer.warmNewReaders(context); } } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (!closed) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), config); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.index.engine.internal.InternalEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { try {/* w ww .jav a 2s. c o m*/ // release locks when started if (IndexWriter.isLocked(store.directory())) { logger.warn("shard is locked, releasing lock"); IndexWriter.unlock(store.directory()); } boolean create = !Lucene.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); MergePolicy mergePolicy = mergePolicyProvider.newMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new IndexUpgraderMergePolicy(mergePolicy); config.setMergePolicy(mergePolicy); config.setSimilarity(similarityService.similarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setMaxThreadStates(indexConcurrency); config.setCodec(codecService.codec(codecName)); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ config.setWriteLockTimeout(5000); config.setUseCompoundFile(this.compoundOnFlush); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh config.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(AtomicReader reader) throws IOException { try { assert isMergedSegment(reader); final Engine.Searcher searcher = new SimpleSearcher("warmer", new IndexSearcher(reader)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); if (warmer != null) warmer.warm(context); } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (!closed) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), config); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.index.engine.InternalEngine.java
License:Apache License
private IndexWriter createWriter(boolean create) throws IOException { try {/* ww w . java 2 s.c o m*/ final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); iwc.setIndexDeletionPolicy(deletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; try { verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); } catch (Throwable ignore) { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); MergePolicy mergePolicy = config().getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); iwc.setMergePolicy(mergePolicy); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().mbFrac()); iwc.setCodec(engineConfig.getCodec()); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ iwc.setWriteLockTimeout(5000); iwc.setUseCompoundFile(this.engineConfig.isCompoundOnFlush()); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh iwc.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(LeafReader reader) throws IOException { try { LeafReader esLeafReader = new ElasticsearchLeafReader(reader, shardId); assert isMergedSegment(esLeafReader); if (warmer != null) { final Engine.Searcher searcher = new Searcher("warmer", searcherFactory.newSearcher(esLeafReader, null)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); warmer.warmNewReaders(context); } } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (isClosed.get() == false) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), iwc); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.index.engine.robin.RobinEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { IndexWriter indexWriter = null;/*from w w w. j a v a2 s.c om*/ try { // release locks when started if (IndexWriter.isLocked(store.directory())) { logger.warn("shard is locked, releasing lock"); IndexWriter.unlock(store.directory()); } boolean create = !IndexReader.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); config.setMergePolicy(mergePolicyProvider.newMergePolicy()); config.setSimilarity(similarityService.defaultIndexSimilarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setTermIndexInterval(termIndexInterval); config.setReaderTermsIndexDivisor(termIndexDivisor); config.setMaxThreadStates(indexConcurrency); indexWriter = new IndexWriter(store.directory(), config); } catch (IOException e) { safeClose(indexWriter); throw e; } return indexWriter; }
From source file:org.elasticsearch.index.store.StoreTest.java
License:Apache License
@Test public void testRecoveryDiff() throws IOException, InterruptedException { int numDocs = 2 + random().nextInt(100); List<Document> docs = new ArrayList<>(); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random())))); docs.add(doc);// w ww. j a v a 2 s . c o m } long seed = random().nextLong(); Store.MetadataSnapshot first; { Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setCodec(actualDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); iwc.setMaxThreadStates(1); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random); Store store = new Store(shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(random, directoryService), new DummyShardLock(shardId)); IndexWriter writer = new IndexWriter(store.directory(), iwc); final boolean lotsOfSegments = rarely(random); for (Document d : docs) { writer.addDocument(d); if (lotsOfSegments && random.nextBoolean()) { writer.commit(); } else if (rarely(random)) { writer.commit(); } } writer.close(); first = store.getMetadata(); assertDeleteContent(store, directoryService); store.close(); } long time = new Date().getTime(); while (time == new Date().getTime()) { Thread.sleep(10); // bump the time } Store.MetadataSnapshot second; Store store; { Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setCodec(actualDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); iwc.setMaxThreadStates(1); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random); store = new Store(shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(random, directoryService), new DummyShardLock(shardId)); IndexWriter writer = new IndexWriter(store.directory(), iwc); final boolean lotsOfSegments = rarely(random); for (Document d : docs) { writer.addDocument(d); if (lotsOfSegments && random.nextBoolean()) { writer.commit(); } else if (rarely(random)) { writer.commit(); } } writer.close(); second = store.getMetadata(); } Store.RecoveryDiff diff = first.recoveryDiff(second); assertThat(first.size(), equalTo(second.size())); for (StoreFileMetaData md : first) { assertThat(second.get(md.name()), notNullValue()); // si files are different - containing timestamps etc assertThat(second.get(md.name()).isSame(md), equalTo(md.name().endsWith(".si") == false)); } assertThat(diff.different.size(), equalTo(first.size() - 1)); assertThat(diff.identical.size(), equalTo(1)); // commit point is identical assertThat(diff.missing, empty()); // check the self diff Store.RecoveryDiff selfDiff = first.recoveryDiff(first); assertThat(selfDiff.identical.size(), equalTo(first.size())); assertThat(selfDiff.different, empty()); assertThat(selfDiff.missing, empty()); // lets add some deletes Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setCodec(actualDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); iwc.setMaxThreadStates(1); iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); IndexWriter writer = new IndexWriter(store.directory(), iwc); writer.deleteDocuments(new Term("id", Integer.toString(random().nextInt(numDocs)))); writer.close(); Store.MetadataSnapshot metadata = store.getMetadata(); StoreFileMetaData delFile = null; for (StoreFileMetaData md : metadata) { if (md.name().endsWith(".del")) { delFile = md; break; } } Store.RecoveryDiff afterDeleteDiff = metadata.recoveryDiff(second); if (delFile != null) { assertThat(afterDeleteDiff.identical.size(), equalTo(metadata.size() - 2)); // segments_N + del file assertThat(afterDeleteDiff.different.size(), equalTo(0)); assertThat(afterDeleteDiff.missing.size(), equalTo(2)); } else { // an entire segment must be missing (single doc segment got dropped) assertThat(afterDeleteDiff.identical.size(), greaterThan(0)); assertThat(afterDeleteDiff.different.size(), equalTo(0)); assertThat(afterDeleteDiff.missing.size(), equalTo(1)); // the commit file is different } // check the self diff selfDiff = metadata.recoveryDiff(metadata); assertThat(selfDiff.identical.size(), equalTo(metadata.size())); assertThat(selfDiff.different, empty()); assertThat(selfDiff.missing, empty()); // add a new commit iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodec(actualDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(true); // force CFS - easier to test here since we know it will add 3 files iwc.setMaxThreadStates(1); iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); writer = new IndexWriter(store.directory(), iwc); writer.addDocument(docs.get(0)); writer.close(); Store.MetadataSnapshot newCommitMetaData = store.getMetadata(); Store.RecoveryDiff newCommitDiff = newCommitMetaData.recoveryDiff(metadata); if (delFile != null) { assertThat(newCommitDiff.identical.size(), equalTo(newCommitMetaData.size() - 5)); // segments_N, del file, cfs, cfe, si for the new segment assertThat(newCommitDiff.different.size(), equalTo(1)); // the del file must be different assertThat(newCommitDiff.different.get(0).name(), endsWith(".del")); assertThat(newCommitDiff.missing.size(), equalTo(4)); // segments_N,cfs, cfe, si for the new segment } else { assertThat(newCommitDiff.identical.size(), equalTo(newCommitMetaData.size() - 4)); // segments_N, cfs, cfe, si for the new segment assertThat(newCommitDiff.different.size(), equalTo(0)); assertThat(newCommitDiff.missing.size(), equalTo(4)); // an entire segment must be missing (single doc segment got dropped) plus the commit is different } deleteContent(store.directory()); IOUtils.close(store); }