List of usage examples for org.apache.lucene.index IndexWriter deleteDocuments
public long deleteDocuments(Query... queries) throws IOException
From source file:org.elasticsearch.common.lucene.IndexWritersTests.java
License:Apache License
@Test public void testEstimateSize() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.commit();/*from www .j ava 2 s .c o m*/ assertThat("Index is empty after creation and commit", estimateRamSize(indexWriter), equalTo(0l)); indexWriter.addDocument(doc().add(field("_id", "1")) .add(new NumericField("test", Field.Store.YES, true).setIntValue(2)).build()); long size = estimateRamSize(indexWriter); assertThat("After indexing a small document, should be higher", size, greaterThan(100000l)); indexWriter.deleteDocuments(new Term("_id", "1")); assertThat(estimateRamSize(indexWriter), greaterThan(size)); indexWriter.commit(); assertThat(estimateRamSize(indexWriter), equalTo(0l)); }
From source file:org.elasticsearch.common.lucene.LuceneTests.java
License:Apache License
public void testCleanIndex() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); dir.setEnableVirusScanner(false);//from w w w . j av a 2s . c o m IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); doc = new Document(); doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.deleteDocuments(new Term("id", "2")); writer.commit(); try (DirectoryReader open = DirectoryReader.open(writer, true)) { assertEquals(3, open.numDocs()); assertEquals(1, open.numDeletedDocs()); assertEquals(4, open.maxDoc()); } writer.close(); if (random().nextBoolean()) { for (String file : dir.listAll()) { if (file.startsWith("_1")) { // delete a random file dir.deleteFile(file); break; } } } Lucene.cleanLuceneIndex(dir); if (dir.listAll().length > 0) { for (String file : dir.listAll()) { if (file.startsWith("extra") == false) { assertEquals(file, "write.lock"); } } } dir.close(); }
From source file:org.elasticsearch.common.lucene.LuceneTests.java
License:Apache License
public void testPruneUnreferencedFiles() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); dir.setEnableVirusScanner(false);/*from w ww . java 2 s . c o m*/ IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir); doc = new Document(); doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.deleteDocuments(new Term("id", "2")); writer.commit(); DirectoryReader open = DirectoryReader.open(writer, true); assertEquals(3, open.numDocs()); assertEquals(1, open.numDeletedDocs()); assertEquals(4, open.maxDoc()); open.close(); writer.close(); SegmentInfos si = Lucene.pruneUnreferencedFiles(segmentCommitInfos.getSegmentsFileName(), dir); assertEquals(si.getSegmentsFileName(), segmentCommitInfos.getSegmentsFileName()); open = DirectoryReader.open(dir); assertEquals(3, open.numDocs()); assertEquals(0, open.numDeletedDocs()); assertEquals(3, open.maxDoc()); IndexSearcher s = new IndexSearcher(open); assertEquals(s.search(new TermQuery(new Term("id", "1")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "2")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "3")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "4")), 1).totalHits, 0); for (String file : dir.listAll()) { assertFalse("unexpected file: " + file, file.equals("segments_3") || file.startsWith("_2")); } open.close(); dir.close(); }
From source file:org.elasticsearch.common.lucene.LuceneTests.java
License:Apache License
public void testNumDocs() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); dir.setEnableVirusScanner(false);//from ww w. ja va 2s. com IndexWriterConfig iwc = newIndexWriterConfig(); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir); assertEquals(1, Lucene.getNumDocs(segmentCommitInfos)); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); segmentCommitInfos = Lucene.readSegmentInfos(dir); assertEquals(1, Lucene.getNumDocs(segmentCommitInfos)); writer.commit(); segmentCommitInfos = Lucene.readSegmentInfos(dir); assertEquals(3, Lucene.getNumDocs(segmentCommitInfos)); writer.deleteDocuments(new Term("id", "2")); writer.commit(); segmentCommitInfos = Lucene.readSegmentInfos(dir); assertEquals(2, Lucene.getNumDocs(segmentCommitInfos)); int numDocsToIndex = randomIntBetween(10, 50); List<Term> deleteTerms = new ArrayList<>(); for (int i = 0; i < numDocsToIndex; i++) { doc = new Document(); doc.add(new TextField("id", "extra_" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); deleteTerms.add(new Term("id", "extra_" + i)); writer.addDocument(doc); } int numDocsToDelete = randomIntBetween(0, numDocsToIndex); Collections.shuffle(deleteTerms, random()); for (int i = 0; i < numDocsToDelete; i++) { Term remove = deleteTerms.remove(0); writer.deleteDocuments(remove); } writer.commit(); segmentCommitInfos = Lucene.readSegmentInfos(dir); assertEquals(2 + deleteTerms.size(), Lucene.getNumDocs(segmentCommitInfos)); writer.close(); dir.close(); }
From source file:org.elasticsearch.common.lucene.uid.UidFieldTests.java
License:Apache License
@Test public void testUidField() throws Exception { IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); IndexReader reader = IndexReader.open(writer, true); assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(-1l)); Document doc = new Document(); doc.add(new Field("_uid", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc);/*ww w . j a v a2 s.c o m*/ reader = reader.reopen(); assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(-2l)); assertThat(UidField.loadDocIdAndVersion(reader, new Term("_uid", "1")).version, equalTo(-2l)); doc = new Document(); doc.add(new UidField("_uid", "1", 1)); writer.updateDocument(new Term("_uid", "1"), doc); reader = reader.reopen(); assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(1l)); assertThat(UidField.loadDocIdAndVersion(reader, new Term("_uid", "1")).version, equalTo(1l)); doc = new Document(); UidField uid = new UidField("_uid", "1", 2); doc.add(uid); writer.updateDocument(new Term("_uid", "1"), doc); reader = reader.reopen(); assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(2l)); assertThat(UidField.loadDocIdAndVersion(reader, new Term("_uid", "1")).version, equalTo(2l)); // test reuse of uid field doc = new Document(); uid.version(3); doc.add(uid); writer.updateDocument(new Term("_uid", "1"), doc); reader = reader.reopen(); assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(3l)); assertThat(UidField.loadDocIdAndVersion(reader, new Term("_uid", "1")).version, equalTo(3l)); writer.deleteDocuments(new Term("_uid", "1")); reader = reader.reopen(); assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(-1l)); assertThat(UidField.loadDocIdAndVersion(reader, new Term("_uid", "1")), nullValue()); }
From source file:org.elasticsearch.common.lucene.uid.VersionsTests.java
License:Apache License
@Test public void testVersions() throws Exception { Directory dir = newDirectory();/*from w w w.ja v a 2s . c o m*/ IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); DirectoryReader directoryReader = DirectoryReader.open(writer, true); MatcherAssert.assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(Versions.NOT_FOUND)); Document doc = new Document(); doc.add(new Field(UidFieldMapper.NAME, "1", UidFieldMapper.Defaults.FIELD_TYPE)); writer.addDocument(doc); directoryReader = reopen(directoryReader); assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(Versions.NOT_SET)); assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")).version, equalTo(Versions.NOT_SET)); doc = new Document(); doc.add(new Field(UidFieldMapper.NAME, "1", UidFieldMapper.Defaults.FIELD_TYPE)); doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, 1)); writer.updateDocument(new Term(UidFieldMapper.NAME, "1"), doc); directoryReader = reopen(directoryReader); assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(1l)); assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")).version, equalTo(1l)); doc = new Document(); Field uid = new Field(UidFieldMapper.NAME, "1", UidFieldMapper.Defaults.FIELD_TYPE); Field version = new NumericDocValuesField(VersionFieldMapper.NAME, 2); doc.add(uid); doc.add(version); writer.updateDocument(new Term(UidFieldMapper.NAME, "1"), doc); directoryReader = reopen(directoryReader); assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(2l)); assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")).version, equalTo(2l)); // test reuse of uid field doc = new Document(); version.setLongValue(3); doc.add(uid); doc.add(version); writer.updateDocument(new Term(UidFieldMapper.NAME, "1"), doc); directoryReader = reopen(directoryReader); assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(3l)); assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")).version, equalTo(3l)); writer.deleteDocuments(new Term(UidFieldMapper.NAME, "1")); directoryReader = reopen(directoryReader); assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(Versions.NOT_FOUND)); assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), nullValue()); directoryReader.close(); writer.close(); dir.close(); }
From source file:org.elasticsearch.common.lucene.uid.VersionsTests.java
License:Apache License
@Test public void testNestedDocuments() throws IOException { Directory dir = newDirectory();/* w w w . j a v a 2s. c o m*/ IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); List<Document> docs = new ArrayList<Document>(); for (int i = 0; i < 4; ++i) { // Nested Document doc = new Document(); doc.add(new Field(UidFieldMapper.NAME, "1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); docs.add(doc); } // Root Document doc = new Document(); doc.add(new Field(UidFieldMapper.NAME, "1", UidFieldMapper.Defaults.FIELD_TYPE)); NumericDocValuesField version = new NumericDocValuesField(VersionFieldMapper.NAME, 5L); doc.add(version); docs.add(doc); writer.updateDocuments(new Term(UidFieldMapper.NAME, "1"), docs); DirectoryReader directoryReader = DirectoryReader.open(writer, true); assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(5l)); assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")).version, equalTo(5l)); version.setLongValue(6L); writer.updateDocuments(new Term(UidFieldMapper.NAME, "1"), docs); version.setLongValue(7L); writer.updateDocuments(new Term(UidFieldMapper.NAME, "1"), docs); directoryReader = reopen(directoryReader); assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(7l)); assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")).version, equalTo(7l)); writer.deleteDocuments(new Term(UidFieldMapper.NAME, "1")); directoryReader = reopen(directoryReader); assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(Versions.NOT_FOUND)); assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), nullValue()); directoryReader.close(); writer.close(); dir.close(); }
From source file:org.elasticsearch.index.cache.filter.FilterCacheTests.java
License:Apache License
private void verifyCache(FilterCache filterCache) throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); DirectoryReader reader = DirectoryReader.open(indexWriter, true); for (int i = 0; i < 100; i++) { Document document = new Document(); document.add(new TextField("id", Integer.toString(i), Field.Store.YES)); indexWriter.addDocument(document); }/*from ww w. j av a 2 s . c o m*/ reader = refreshReader(reader); IndexSearcher searcher = new IndexSearcher(reader); assertThat( Lucene.count(searcher, new ConstantScoreQuery(filterCache.cache(new TermFilter(new Term("id", "1"))))), equalTo(1l)); assertThat(Lucene.count(searcher, new XFilteredQuery(new MatchAllDocsQuery(), filterCache.cache(new TermFilter(new Term("id", "1"))))), equalTo(1l)); indexWriter.deleteDocuments(new Term("id", "1")); reader = refreshReader(reader); searcher = new IndexSearcher(reader); TermFilter filter = new TermFilter(new Term("id", "1")); Filter cachedFilter = filterCache.cache(filter); long constantScoreCount = filter == cachedFilter ? 0 : 1; // sadly, when caching based on cacheKey with NRT, this fails, that's why we have DeletionAware one assertThat(Lucene.count(searcher, new ConstantScoreQuery(cachedFilter)), equalTo(constantScoreCount)); assertThat(Lucene.count(searcher, new XConstantScoreQuery(cachedFilter)), equalTo(0l)); assertThat(Lucene.count(searcher, new XFilteredQuery(new MatchAllDocsQuery(), cachedFilter)), equalTo(0l)); indexWriter.close(); }
From source file:org.elasticsearch.index.cache.id.SimpleIdCacheTests.java
License:Apache License
@Test public void testDeletedDocuments() throws Exception { SimpleIdCache idCache = createSimpleIdCache(Tuple.tuple("child", "parent")); IndexWriter writer = createIndexWriter(); // Begins with parent, ends with child docs final Document parent = doc("parent", "1"); writer.addDocument(parent);/*w w w.java 2 s . c om*/ writer.addDocument(childDoc("child", "1", "parent", "1")); writer.addDocument(childDoc("child", "2", "parent", "1")); writer.addDocument(childDoc("child", "3", "parent", "1")); writer.commit(); final String parentUid = parent.get("_uid"); assert parentUid != null; writer.deleteDocuments(new Term("_uid", parentUid)); writer.close(); DirectoryReader topLevelReader = DirectoryReader.open(writer.getDirectory()); List<AtomicReaderContext> leaves = topLevelReader.getContext().leaves(); idCache.refresh(leaves); assertThat(leaves.size(), equalTo(1)); IdReaderCache readerCache = idCache.reader(leaves.get(0).reader()); IdReaderTypeCache typeCache = readerCache.type("parent"); assertThat(typeCache.idByDoc(0).toUtf8(), equalTo("1")); }
From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java
License:Apache License
private void innerDelete(Delete delete, IndexWriter writer) throws IOException { synchronized (dirtyLock(delete.uid())) { final long currentVersion; VersionValue versionValue = versionMap.getUnderLock(delete.uid().bytes()); if (versionValue == null) { currentVersion = loadCurrentVersionFromIndex(delete.uid()); } else {/*ww w. j a va2s . c o m*/ if (enableGcDeletes && versionValue.delete() && (threadPool.estimatedTimeInMillis() - versionValue.time()) > gcDeletesInMillis) { currentVersion = Versions.NOT_FOUND; // deleted, and GC } else { currentVersion = versionValue.version(); } } long updatedVersion; long expectedVersion = delete.version(); if (delete.versionType().isVersionConflictForWrites(currentVersion, expectedVersion)) { if (delete.origin() == Operation.Origin.RECOVERY) { return; } else { throw new VersionConflictEngineException(shardId, delete.type(), delete.id(), currentVersion, expectedVersion); } } updatedVersion = delete.versionType().updateVersion(currentVersion, expectedVersion); final boolean found; if (currentVersion == Versions.NOT_FOUND) { // doc does not exist and no prior deletes found = false; } else if (versionValue != null && versionValue.delete()) { // a "delete on delete", in this case, we still increment the version, log it, and return that version found = false; } else { // we deleted a currently existing document writer.deleteDocuments(delete.uid()); found = true; } delete.updateVersion(updatedVersion, found); Translog.Location translogLocation = translog.add(new Translog.Delete(delete)); versionMap.putUnderLock(delete.uid().bytes(), new DeleteVersionValue(updatedVersion, threadPool.estimatedTimeInMillis(), translogLocation)); indexingService.postDeleteUnderLock(delete); } }