Example usage for org.apache.lucene.index IndexWriter deleteDocuments

List of usage examples for org.apache.lucene.index IndexWriter deleteDocuments

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter deleteDocuments.

Prototype

public long deleteDocuments(Query... queries) throws IOException 

Source Link

Document

Deletes the document(s) matching any of the provided queries.

Usage

From source file:org.elasticsearch.common.lucene.IndexWritersTests.java

License:Apache License

@Test
public void testEstimateSize() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true,
            IndexWriter.MaxFieldLength.UNLIMITED);
    indexWriter.commit();/*from   www  .j ava  2  s .c o m*/
    assertThat("Index is empty after creation and commit", estimateRamSize(indexWriter), equalTo(0l));

    indexWriter.addDocument(doc().add(field("_id", "1"))
            .add(new NumericField("test", Field.Store.YES, true).setIntValue(2)).build());

    long size = estimateRamSize(indexWriter);
    assertThat("After indexing a small document, should be higher", size, greaterThan(100000l));

    indexWriter.deleteDocuments(new Term("_id", "1"));
    assertThat(estimateRamSize(indexWriter), greaterThan(size));

    indexWriter.commit();
    assertThat(estimateRamSize(indexWriter), equalTo(0l));
}

From source file:org.elasticsearch.common.lucene.LuceneTests.java

License:Apache License

public void testCleanIndex() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    dir.setEnableVirusScanner(false);//from   w  w w  . j  av a 2s .  c o  m
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setMaxBufferedDocs(2);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();

    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    writer.commit();
    doc = new Document();
    doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    writer.deleteDocuments(new Term("id", "2"));
    writer.commit();
    try (DirectoryReader open = DirectoryReader.open(writer, true)) {
        assertEquals(3, open.numDocs());
        assertEquals(1, open.numDeletedDocs());
        assertEquals(4, open.maxDoc());
    }
    writer.close();
    if (random().nextBoolean()) {
        for (String file : dir.listAll()) {
            if (file.startsWith("_1")) {
                // delete a random file
                dir.deleteFile(file);
                break;
            }
        }
    }
    Lucene.cleanLuceneIndex(dir);
    if (dir.listAll().length > 0) {
        for (String file : dir.listAll()) {
            if (file.startsWith("extra") == false) {
                assertEquals(file, "write.lock");
            }
        }
    }
    dir.close();
}

From source file:org.elasticsearch.common.lucene.LuceneTests.java

License:Apache License

public void testPruneUnreferencedFiles() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    dir.setEnableVirusScanner(false);/*from w  ww  . java  2 s  .  c o m*/
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setMaxBufferedDocs(2);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();

    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    writer.commit();
    SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir);

    doc = new Document();
    doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    writer.deleteDocuments(new Term("id", "2"));
    writer.commit();
    DirectoryReader open = DirectoryReader.open(writer, true);
    assertEquals(3, open.numDocs());
    assertEquals(1, open.numDeletedDocs());
    assertEquals(4, open.maxDoc());
    open.close();
    writer.close();
    SegmentInfos si = Lucene.pruneUnreferencedFiles(segmentCommitInfos.getSegmentsFileName(), dir);
    assertEquals(si.getSegmentsFileName(), segmentCommitInfos.getSegmentsFileName());
    open = DirectoryReader.open(dir);
    assertEquals(3, open.numDocs());
    assertEquals(0, open.numDeletedDocs());
    assertEquals(3, open.maxDoc());

    IndexSearcher s = new IndexSearcher(open);
    assertEquals(s.search(new TermQuery(new Term("id", "1")), 1).totalHits, 1);
    assertEquals(s.search(new TermQuery(new Term("id", "2")), 1).totalHits, 1);
    assertEquals(s.search(new TermQuery(new Term("id", "3")), 1).totalHits, 1);
    assertEquals(s.search(new TermQuery(new Term("id", "4")), 1).totalHits, 0);

    for (String file : dir.listAll()) {
        assertFalse("unexpected file: " + file, file.equals("segments_3") || file.startsWith("_2"));
    }
    open.close();
    dir.close();

}

From source file:org.elasticsearch.common.lucene.LuceneTests.java

License:Apache License

public void testNumDocs() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    dir.setEnableVirusScanner(false);//from ww w. ja va  2s. com
    IndexWriterConfig iwc = newIndexWriterConfig();
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();
    SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(1, Lucene.getNumDocs(segmentCommitInfos));

    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(1, Lucene.getNumDocs(segmentCommitInfos));
    writer.commit();
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(3, Lucene.getNumDocs(segmentCommitInfos));
    writer.deleteDocuments(new Term("id", "2"));
    writer.commit();
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(2, Lucene.getNumDocs(segmentCommitInfos));

    int numDocsToIndex = randomIntBetween(10, 50);
    List<Term> deleteTerms = new ArrayList<>();
    for (int i = 0; i < numDocsToIndex; i++) {
        doc = new Document();
        doc.add(new TextField("id", "extra_" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        deleteTerms.add(new Term("id", "extra_" + i));
        writer.addDocument(doc);
    }
    int numDocsToDelete = randomIntBetween(0, numDocsToIndex);
    Collections.shuffle(deleteTerms, random());
    for (int i = 0; i < numDocsToDelete; i++) {
        Term remove = deleteTerms.remove(0);
        writer.deleteDocuments(remove);
    }
    writer.commit();
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(2 + deleteTerms.size(), Lucene.getNumDocs(segmentCommitInfos));
    writer.close();
    dir.close();
}

From source file:org.elasticsearch.common.lucene.uid.UidFieldTests.java

License:Apache License

@Test
public void testUidField() throws Exception {
    IndexWriter writer = new IndexWriter(new RAMDirectory(),
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    IndexReader reader = IndexReader.open(writer, true);
    assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(-1l));

    Document doc = new Document();
    doc.add(new Field("_uid", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
    writer.addDocument(doc);/*ww w  . j a  v  a2 s.c o m*/
    reader = reader.reopen();
    assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(-2l));
    assertThat(UidField.loadDocIdAndVersion(reader, new Term("_uid", "1")).version, equalTo(-2l));

    doc = new Document();
    doc.add(new UidField("_uid", "1", 1));
    writer.updateDocument(new Term("_uid", "1"), doc);
    reader = reader.reopen();
    assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(1l));
    assertThat(UidField.loadDocIdAndVersion(reader, new Term("_uid", "1")).version, equalTo(1l));

    doc = new Document();
    UidField uid = new UidField("_uid", "1", 2);
    doc.add(uid);
    writer.updateDocument(new Term("_uid", "1"), doc);
    reader = reader.reopen();
    assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(2l));
    assertThat(UidField.loadDocIdAndVersion(reader, new Term("_uid", "1")).version, equalTo(2l));

    // test reuse of uid field
    doc = new Document();
    uid.version(3);
    doc.add(uid);
    writer.updateDocument(new Term("_uid", "1"), doc);
    reader = reader.reopen();
    assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(3l));
    assertThat(UidField.loadDocIdAndVersion(reader, new Term("_uid", "1")).version, equalTo(3l));

    writer.deleteDocuments(new Term("_uid", "1"));
    reader = reader.reopen();
    assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(-1l));
    assertThat(UidField.loadDocIdAndVersion(reader, new Term("_uid", "1")), nullValue());
}

From source file:org.elasticsearch.common.lucene.uid.VersionsTests.java

License:Apache License

@Test
public void testVersions() throws Exception {
    Directory dir = newDirectory();/*from   w w w.ja  v  a 2s .  c o m*/
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));
    DirectoryReader directoryReader = DirectoryReader.open(writer, true);
    MatcherAssert.assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")),
            equalTo(Versions.NOT_FOUND));

    Document doc = new Document();
    doc.add(new Field(UidFieldMapper.NAME, "1", UidFieldMapper.Defaults.FIELD_TYPE));
    writer.addDocument(doc);
    directoryReader = reopen(directoryReader);
    assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")),
            equalTo(Versions.NOT_SET));
    assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")).version,
            equalTo(Versions.NOT_SET));

    doc = new Document();
    doc.add(new Field(UidFieldMapper.NAME, "1", UidFieldMapper.Defaults.FIELD_TYPE));
    doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, 1));
    writer.updateDocument(new Term(UidFieldMapper.NAME, "1"), doc);
    directoryReader = reopen(directoryReader);
    assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(1l));
    assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")).version,
            equalTo(1l));

    doc = new Document();
    Field uid = new Field(UidFieldMapper.NAME, "1", UidFieldMapper.Defaults.FIELD_TYPE);
    Field version = new NumericDocValuesField(VersionFieldMapper.NAME, 2);
    doc.add(uid);
    doc.add(version);
    writer.updateDocument(new Term(UidFieldMapper.NAME, "1"), doc);
    directoryReader = reopen(directoryReader);
    assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(2l));
    assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")).version,
            equalTo(2l));

    // test reuse of uid field
    doc = new Document();
    version.setLongValue(3);
    doc.add(uid);
    doc.add(version);
    writer.updateDocument(new Term(UidFieldMapper.NAME, "1"), doc);

    directoryReader = reopen(directoryReader);
    assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(3l));
    assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")).version,
            equalTo(3l));

    writer.deleteDocuments(new Term(UidFieldMapper.NAME, "1"));
    directoryReader = reopen(directoryReader);
    assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")),
            equalTo(Versions.NOT_FOUND));
    assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), nullValue());
    directoryReader.close();
    writer.close();
    dir.close();
}

From source file:org.elasticsearch.common.lucene.uid.VersionsTests.java

License:Apache License

@Test
public void testNestedDocuments() throws IOException {
    Directory dir = newDirectory();/*  w w w . j a v a  2s. c o m*/
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    List<Document> docs = new ArrayList<Document>();
    for (int i = 0; i < 4; ++i) {
        // Nested
        Document doc = new Document();
        doc.add(new Field(UidFieldMapper.NAME, "1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
        docs.add(doc);
    }
    // Root
    Document doc = new Document();
    doc.add(new Field(UidFieldMapper.NAME, "1", UidFieldMapper.Defaults.FIELD_TYPE));
    NumericDocValuesField version = new NumericDocValuesField(VersionFieldMapper.NAME, 5L);
    doc.add(version);
    docs.add(doc);

    writer.updateDocuments(new Term(UidFieldMapper.NAME, "1"), docs);
    DirectoryReader directoryReader = DirectoryReader.open(writer, true);
    assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(5l));
    assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")).version,
            equalTo(5l));

    version.setLongValue(6L);
    writer.updateDocuments(new Term(UidFieldMapper.NAME, "1"), docs);
    version.setLongValue(7L);
    writer.updateDocuments(new Term(UidFieldMapper.NAME, "1"), docs);
    directoryReader = reopen(directoryReader);
    assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), equalTo(7l));
    assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")).version,
            equalTo(7l));

    writer.deleteDocuments(new Term(UidFieldMapper.NAME, "1"));
    directoryReader = reopen(directoryReader);
    assertThat(Versions.loadVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")),
            equalTo(Versions.NOT_FOUND));
    assertThat(Versions.loadDocIdAndVersion(directoryReader, new Term(UidFieldMapper.NAME, "1")), nullValue());
    directoryReader.close();
    writer.close();
    dir.close();
}

From source file:org.elasticsearch.index.cache.filter.FilterCacheTests.java

License:Apache License

private void verifyCache(FilterCache filterCache) throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));
    DirectoryReader reader = DirectoryReader.open(indexWriter, true);

    for (int i = 0; i < 100; i++) {
        Document document = new Document();
        document.add(new TextField("id", Integer.toString(i), Field.Store.YES));
        indexWriter.addDocument(document);
    }/*from ww w.  j av a 2 s  . c o m*/

    reader = refreshReader(reader);
    IndexSearcher searcher = new IndexSearcher(reader);
    assertThat(
            Lucene.count(searcher,
                    new ConstantScoreQuery(filterCache.cache(new TermFilter(new Term("id", "1"))))),
            equalTo(1l));
    assertThat(Lucene.count(searcher, new XFilteredQuery(new MatchAllDocsQuery(),
            filterCache.cache(new TermFilter(new Term("id", "1"))))), equalTo(1l));

    indexWriter.deleteDocuments(new Term("id", "1"));
    reader = refreshReader(reader);
    searcher = new IndexSearcher(reader);
    TermFilter filter = new TermFilter(new Term("id", "1"));
    Filter cachedFilter = filterCache.cache(filter);
    long constantScoreCount = filter == cachedFilter ? 0 : 1;
    // sadly, when caching based on cacheKey with NRT, this fails, that's why we have DeletionAware one
    assertThat(Lucene.count(searcher, new ConstantScoreQuery(cachedFilter)), equalTo(constantScoreCount));
    assertThat(Lucene.count(searcher, new XConstantScoreQuery(cachedFilter)), equalTo(0l));
    assertThat(Lucene.count(searcher, new XFilteredQuery(new MatchAllDocsQuery(), cachedFilter)), equalTo(0l));

    indexWriter.close();
}

From source file:org.elasticsearch.index.cache.id.SimpleIdCacheTests.java

License:Apache License

@Test
public void testDeletedDocuments() throws Exception {
    SimpleIdCache idCache = createSimpleIdCache(Tuple.tuple("child", "parent"));
    IndexWriter writer = createIndexWriter();
    // Begins with parent, ends with child docs
    final Document parent = doc("parent", "1");
    writer.addDocument(parent);/*w  w w.java 2 s . c  om*/
    writer.addDocument(childDoc("child", "1", "parent", "1"));
    writer.addDocument(childDoc("child", "2", "parent", "1"));
    writer.addDocument(childDoc("child", "3", "parent", "1"));
    writer.commit();

    final String parentUid = parent.get("_uid");
    assert parentUid != null;
    writer.deleteDocuments(new Term("_uid", parentUid));

    writer.close();
    DirectoryReader topLevelReader = DirectoryReader.open(writer.getDirectory());
    List<AtomicReaderContext> leaves = topLevelReader.getContext().leaves();
    idCache.refresh(leaves);

    assertThat(leaves.size(), equalTo(1));
    IdReaderCache readerCache = idCache.reader(leaves.get(0).reader());
    IdReaderTypeCache typeCache = readerCache.type("parent");
    assertThat(typeCache.idByDoc(0).toUtf8(), equalTo("1"));
}

From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java

License:Apache License

private void innerDelete(Delete delete, IndexWriter writer) throws IOException {
    synchronized (dirtyLock(delete.uid())) {
        final long currentVersion;
        VersionValue versionValue = versionMap.getUnderLock(delete.uid().bytes());
        if (versionValue == null) {
            currentVersion = loadCurrentVersionFromIndex(delete.uid());
        } else {/*ww  w.  j  a  va2s . c  o  m*/
            if (enableGcDeletes && versionValue.delete()
                    && (threadPool.estimatedTimeInMillis() - versionValue.time()) > gcDeletesInMillis) {
                currentVersion = Versions.NOT_FOUND; // deleted, and GC
            } else {
                currentVersion = versionValue.version();
            }
        }

        long updatedVersion;
        long expectedVersion = delete.version();
        if (delete.versionType().isVersionConflictForWrites(currentVersion, expectedVersion)) {
            if (delete.origin() == Operation.Origin.RECOVERY) {
                return;
            } else {
                throw new VersionConflictEngineException(shardId, delete.type(), delete.id(), currentVersion,
                        expectedVersion);
            }
        }
        updatedVersion = delete.versionType().updateVersion(currentVersion, expectedVersion);
        final boolean found;
        if (currentVersion == Versions.NOT_FOUND) {
            // doc does not exist and no prior deletes
            found = false;
        } else if (versionValue != null && versionValue.delete()) {
            // a "delete on delete", in this case, we still increment the version, log it, and return that version
            found = false;
        } else {
            // we deleted a currently existing document
            writer.deleteDocuments(delete.uid());
            found = true;
        }

        delete.updateVersion(updatedVersion, found);
        Translog.Location translogLocation = translog.add(new Translog.Delete(delete));
        versionMap.putUnderLock(delete.uid().bytes(),
                new DeleteVersionValue(updatedVersion, threadPool.estimatedTimeInMillis(), translogLocation));

        indexingService.postDeleteUnderLock(delete);
    }
}