Example usage for org.apache.lucene.index IndexWriter commit

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter commit.

Prototype

@Override
public final long commit() throws IOException

Source Link

Document

Commits all pending changes (added and deleted documents, segment merges, added indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the changes and the index updates will survive an OS or machine crash or power loss.

Usage

From source file:org.elasticsearch.index.store.StoreTests.java

License:Apache License

@Test
public void testCleanupFromSnapshot() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store = new Store(shardId, Settings.EMPTY, directoryService, new DummyShardLock(shardId));
    // this time random codec....
    IndexWriterConfig indexWriterConfig = newIndexWriterConfig(random(), new MockAnalyzer(random()))
            .setCodec(TestUtil.getDefaultCodec());
    // we keep all commits and that allows us clean based on multiple snapshots
    indexWriterConfig.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    IndexWriter writer = new IndexWriter(store.directory(), indexWriterConfig);
    int docs = 1 + random().nextInt(100);
    int numCommits = 0;
    for (int i = 0; i < docs; i++) {
        if (i > 0 && randomIntBetween(0, 10) == 0) {
            writer.commit();
            numCommits++;//  w w  w  .  j  a  v  a  2  s .  c o m
        }
        Document doc = new Document();
        doc.add(new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()),
                random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
        writer.addDocument(doc);

    }
    if (numCommits < 1) {
        writer.commit();
        Document doc = new Document();
        doc.add(new TextField("id", "" + docs++, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()),
                random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
        writer.addDocument(doc);
    }

    Store.MetadataSnapshot firstMeta = store.getMetadata();

    if (random().nextBoolean()) {
        for (int i = 0; i < docs; i++) {
            if (random().nextBoolean()) {
                Document doc = new Document();
                doc.add(new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
                doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()),
                        random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
                writer.updateDocument(new Term("id", "" + i), doc);
            }
        }
    }
    writer.commit();
    writer.close();

    Store.MetadataSnapshot secondMeta = store.getMetadata();

    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    Map<String, StoreFileMetaData> legacyMeta = new HashMap<>();
    for (String file : store.directory().listAll()) {
        if (file.equals("write.lock") || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)
                || file.startsWith("extra")) {
            continue;
        }
        BytesRef hash = new BytesRef();
        if (file.startsWith("segments")) {
            hash = Store.MetadataSnapshot.hashFile(store.directory(), file);
        }
        StoreFileMetaData storeFileMetaData = new StoreFileMetaData(file, store.directory().fileLength(file),
                file + "checksum", null, hash);
        legacyMeta.put(file, storeFileMetaData);
        checksums.add(storeFileMetaData);
    }
    checksums.write(store); // write one checksum file here - we expect it to survive all the cleanups

    if (randomBoolean()) {
        store.cleanupAndVerify("test", firstMeta);
        String[] strings = store.directory().listAll();
        int numChecksums = 0;
        int numNotFound = 0;
        for (String file : strings) {
            if (file.startsWith("extra")) {
                continue;
            }
            assertTrue(firstMeta.contains(file) || Store.isChecksum(file) || file.equals("write.lock"));
            if (Store.isChecksum(file)) {
                numChecksums++;
            } else if (secondMeta.contains(file) == false) {
                numNotFound++;
            }

        }
        assertTrue("at least one file must not be in here since we have two commits?", numNotFound > 0);
        assertEquals("we wrote one checksum but it's gone now? - checksums are supposed to be kept",
                numChecksums, 1);
    } else {
        store.cleanupAndVerify("test", secondMeta);
        String[] strings = store.directory().listAll();
        int numChecksums = 0;
        int numNotFound = 0;
        for (String file : strings) {
            if (file.startsWith("extra")) {
                continue;
            }
            assertTrue(file, secondMeta.contains(file) || Store.isChecksum(file) || file.equals("write.lock"));
            if (Store.isChecksum(file)) {
                numChecksums++;
            } else if (firstMeta.contains(file) == false) {
                numNotFound++;
            }

        }
        assertTrue("at least one file must not be in here since we have two commits?", numNotFound > 0);
        assertEquals("we wrote one checksum but it's gone now? - checksums are supposed to be kept",
                numChecksums, 1);
    }

    deleteContent(store.directory());
    IOUtils.close(store);
}

From source file:org.elasticsearch.index.store.StoreTests.java

License:Apache License

@Test
public void testUserDataRead() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store = new Store(shardId, Settings.EMPTY, directoryService, new DummyShardLock(shardId));
    IndexWriterConfig config = newIndexWriterConfig(random(), new MockAnalyzer(random()))
            .setCodec(TestUtil.getDefaultCodec());
    SnapshotDeletionPolicy deletionPolicy = new SnapshotDeletionPolicy(
            new KeepOnlyLastDeletionPolicy(shardId, EMPTY_SETTINGS));
    config.setIndexDeletionPolicy(deletionPolicy);
    IndexWriter writer = new IndexWriter(store.directory(), config);
    Document doc = new Document();
    doc.add(new TextField("id", "1", Field.Store.NO));
    writer.addDocument(doc);/* w w  w. j  a  v  a 2 s  .c  om*/
    Map<String, String> commitData = new HashMap<>(2);
    String syncId = "a sync id";
    String translogId = "a translog id";
    commitData.put(Engine.SYNC_COMMIT_ID, syncId);
    commitData.put(Translog.TRANSLOG_GENERATION_KEY, translogId);
    writer.setCommitData(commitData);
    writer.commit();
    writer.close();
    Store.MetadataSnapshot metadata;
    if (randomBoolean()) {
        metadata = store.getMetadata();
    } else {
        metadata = store.getMetadata(deletionPolicy.snapshot());
    }
    assertFalse(metadata.asMap().isEmpty());
    // do not check for correct files, we have enough tests for that above
    assertThat(metadata.getCommitUserData().get(Engine.SYNC_COMMIT_ID), equalTo(syncId));
    assertThat(metadata.getCommitUserData().get(Translog.TRANSLOG_GENERATION_KEY), equalTo(translogId));
    TestUtil.checkIndex(store.directory());
    assertDeleteContent(store, directoryService);
    IOUtils.close(store);
}

From source file:org.elasticsearch.index.store.StoreTests.java

License:Apache License

public void testMarkCorruptedOnTruncatedSegmentsFile() throws IOException {
    IndexWriterConfig iwc = newIndexWriterConfig();
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store = new Store(shardId, Settings.EMPTY, directoryService, new DummyShardLock(shardId));
    IndexWriter writer = new IndexWriter(store.directory(), iwc);

    int numDocs = 1 + random().nextInt(10);
    List<Document> docs = new ArrayList<>();
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()),
                random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
        docs.add(doc);//from w  w  w.ja  va 2 s .c  o m
    }
    for (Document d : docs) {
        writer.addDocument(d);
    }
    writer.commit();
    writer.close();
    MockDirectoryWrapper leaf = DirectoryUtils.getLeaf(store.directory(), MockDirectoryWrapper.class);
    if (leaf != null) {
        leaf.setPreventDoubleWrite(false); // I do this on purpose
    }
    SegmentInfos segmentCommitInfos = store.readLastCommittedSegmentsInfo();
    try (IndexOutput out = store.directory().createOutput(segmentCommitInfos.getSegmentsFileName(),
            IOContext.DEFAULT)) {
        // empty file
    }

    try {
        if (randomBoolean()) {
            store.getMetadata();
        } else {
            store.readLastCommittedSegmentsInfo();
        }
        fail("corrupted segments_N file");
    } catch (CorruptIndexException ex) {
        // expected
    }
    assertTrue(store.isMarkedCorrupted());
    Lucene.cleanLuceneIndex(store.directory()); // we have to remove the index since it's corrupted and might fail the MocKDirWrapper checkindex call
    store.close();
}

From source file:org.elasticsearch.index.store.StoreTests.java

License:Apache License

public void testCanOpenIndex() throws IOException {
    IndexWriterConfig iwc = newIndexWriterConfig();
    Path tempDir = createTempDir();
    final BaseDirectoryWrapper dir = newFSDirectory(tempDir);
    assertFalse(Store.canOpenIndex(logger, tempDir));
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new StringField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);/*from www.java2s . c  o m*/
    writer.commit();
    writer.close();
    assertTrue(Store.canOpenIndex(logger, tempDir));

    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new DirectoryService(shardId, Settings.EMPTY) {
        @Override
        public long throttleTimeInNanos() {
            return 0;
        }

        @Override
        public Directory newDirectory() throws IOException {
            return dir;
        }
    };
    Store store = new Store(shardId, Settings.EMPTY, directoryService, new DummyShardLock(shardId));
    store.markStoreCorrupted(new CorruptIndexException("foo", "bar"));
    assertFalse(Store.canOpenIndex(logger, tempDir));
    store.close();
}

From source file:org.elasticsearch.index.utils.RedisFilterTests.java

License:Apache License

public void testTermFilter() throws Exception {
    String fieldName = "field1";
    Directory rd = new RAMDirectory();
    IndexWriter w = new IndexWriter(rd, new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer()));
    for (int i = 0; i < 100; i++) {
        Document doc = new Document();
        int term = i * 10; //terms are units of 10;
        doc.add(new Field(fieldName, "" + term, StringField.TYPE_NOT_STORED));
        doc.add(new Field("all", "xxx", StringField.TYPE_NOT_STORED));
        doc.add(new Field("_uid", "id" + i, StringField.TYPE_NOT_STORED));
        w.addDocument(doc);//from  w w w .  j ava  2 s  .c o  m
        if ((i % 40) == 0) {
            w.commit();
        }
    }
    AtomicReader reader = new SlowCompositeReaderWrapper(DirectoryReader.open(w, true));
    w.close();

    //        TermFilter tf = new TermFilter(new Term(fieldName, "19"));
    //        FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs());
    //        assertThat(bits, nullValue());
    //
    //        tf = new TermFilter(new Term(fieldName, "20"));
    //        bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs());
    //        assertThat(bits.cardinality(), equalTo(1));
    //
    //        tf = new TermFilter(new Term("all", "xxx"));
    //        bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs());
    //        assertThat(bits.cardinality(), equalTo(100));

    //test redis filter

    RedisFilter tf = new RedisFilter(new Term("all", "xxx"));
    FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs());
    System.out.println(bits.cardinality());
    //        assertThat(bits.cardinality(), equalTo(1));

    reader.close();
    rd.close();
}

From source file:org.elasticsearch.search.suggest.completion.CompletionPostingsFormatTest.java

License:Apache License

public Lookup buildAnalyzingLookup(final CompletionFieldMapper mapper, String[] terms, String[] surfaces,
        long[] weights) throws IOException {
    RAMDirectory dir = new RAMDirectory();
    FilterCodec filterCodec = new FilterCodec("filtered", Codec.getDefault()) {
        public PostingsFormat postingsFormat() {
            return mapper.postingsFormatProvider().get();
        }/*from   w  w w .  j  av a  2s  .c  o  m*/
    };
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(TEST_VERSION_CURRENT, mapper.indexAnalyzer());

    indexWriterConfig.setCodec(filterCodec);
    IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
    for (int i = 0; i < weights.length; i++) {
        Document doc = new Document();
        BytesRef payload = mapper.buildPayload(new BytesRef(surfaces[i]), weights[i],
                new BytesRef(Long.toString(weights[i])));
        doc.add(mapper.getCompletionField(terms[i], payload));
        if (randomBoolean()) {
            writer.commit();
        }
        writer.addDocument(doc);
    }
    writer.commit();
    writer.forceMerge(1);
    writer.commit();
    DirectoryReader reader = DirectoryReader.open(writer, true);
    assertThat(reader.leaves().size(), equalTo(1));
    assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length));
    AtomicReaderContext atomicReaderContext = reader.leaves().get(0);
    Terms luceneTerms = atomicReaderContext.reader().terms(mapper.name());
    Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms).getLookup(mapper,
            new CompletionSuggestionContext(null));
    reader.close();
    writer.close();
    dir.close();
    return lookup;
}

From source file:org.elasticsearch.search.suggest.completion.CompletionPostingsFormatTests.java

License:Apache License

public Lookup buildAnalyzingLookup(final CompletionFieldMapper mapper, String[] terms, String[] surfaces,
        long[] weights) throws IOException {
    RAMDirectory dir = new RAMDirectory();
    Codec codec = new Lucene54Codec() {
        @Override//from  ww w .  j  a  v a2  s . com
        public PostingsFormat getPostingsFormatForField(String field) {
            final PostingsFormat in = super.getPostingsFormatForField(field);
            return mapper.fieldType().postingsFormat(in);
        }
    };
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(mapper.fieldType().indexAnalyzer());

    indexWriterConfig.setCodec(codec);
    IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
    for (int i = 0; i < weights.length; i++) {
        Document doc = new Document();
        BytesRef payload = mapper.buildPayload(new BytesRef(surfaces[i]), weights[i],
                new BytesRef(Long.toString(weights[i])));
        doc.add(mapper.getCompletionField(ContextMapping.EMPTY_CONTEXT, terms[i], payload));
        if (randomBoolean()) {
            writer.commit();
        }
        writer.addDocument(doc);
    }
    writer.commit();
    writer.forceMerge(1, true);
    writer.commit();
    DirectoryReader reader = DirectoryReader.open(writer, true);
    assertThat(reader.leaves().size(), equalTo(1));
    assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length));
    LeafReaderContext atomicReaderContext = reader.leaves().get(0);
    Terms luceneTerms = atomicReaderContext.reader().terms(mapper.fieldType().names().fullName());
    Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms).getLookup(mapper.fieldType(),
            new CompletionSuggestionContext(null));
    reader.close();
    writer.close();
    dir.close();
    return lookup;
}

From source file:org.elasticsearch.search.suggest.completion.old.CompletionPostingsFormatTest.java

License:Apache License

public Lookup buildAnalyzingLookup(final OldCompletionFieldMapper mapper, String[] terms, String[] surfaces,
        long[] weights) throws IOException {
    RAMDirectory dir = new RAMDirectory();
    Codec codec = new Lucene50Codec() {
        public PostingsFormat getPostingsFormatForField(String field) {
            final PostingsFormat in = super.getPostingsFormatForField(field);
            return mapper.fieldType().postingsFormat(in);
        }//from   w  ww.  ja  v a2s.  co  m
    };
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(mapper.fieldType().indexAnalyzer());

    indexWriterConfig.setCodec(codec);
    IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
    for (int i = 0; i < weights.length; i++) {
        Document doc = new Document();
        BytesRef payload = mapper.buildPayload(new BytesRef(surfaces[i]), weights[i],
                new BytesRef(Long.toString(weights[i])));
        doc.add(mapper.getCompletionField(ContextMapping.EMPTY_CONTEXT, terms[i], payload));
        if (randomBoolean()) {
            writer.commit();
        }
        writer.addDocument(doc);
    }
    writer.commit();
    writer.forceMerge(1, true);
    writer.commit();
    DirectoryReader reader = DirectoryReader.open(writer, true);
    assertThat(reader.leaves().size(), equalTo(1));
    assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length));
    LeafReaderContext atomicReaderContext = reader.leaves().get(0);
    Terms luceneTerms = atomicReaderContext.reader().terms(mapper.fieldType().names().fullName());
    Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms).getLookup(mapper.fieldType(),
            new CompletionSuggestionContext(null));
    reader.close();
    writer.close();
    dir.close();
    return lookup;
}

From source file:org.elasticsearch.termvectors.AbstractTermVectorTests.java

License:Apache License

protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {

    Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
    for (TestFieldSetting field : testDocs[0].fieldSettings) {
        if (field.storedPayloads) {
            mapping.put(field.name, new Analyzer() {
                @Override// w ww  .  j  a  va 2 s  .  c  o  m
                protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
                    Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader);
                    TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer);
                    filter = new TypeAsPayloadTokenFilter(filter);
                    return new TokenStreamComponents(tokenizer, filter);
                }

            });
        }
    }
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(
            new StandardAnalyzer(Version.CURRENT.luceneVersion), mapping);

    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper);

    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);

    for (TestDoc doc : testDocs) {
        Document d = new Document();
        d.add(new Field("id", doc.id, StringField.TYPE_STORED));
        for (int i = 0; i < doc.fieldContent.length; i++) {
            FieldType type = new FieldType(TextField.TYPE_STORED);
            TestFieldSetting fieldSetting = doc.fieldSettings[i];

            type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
            type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
            type.setStoreTermVectorPositions(
                    fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset);
            type.setStoreTermVectors(true);
            type.freeze();
            d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
        }
        writer.updateDocument(new Term("id", doc.id), d);
        writer.commit();
    }
    writer.close();

    return DirectoryReader.open(dir);
}

From source file:org.elasticsearch.test.integration.search.suggest.CompletionPostingsFormatTest.java

License:Apache License

public Lookup buildAnalyzingLookup(final CompletionFieldMapper mapper, String[] terms, String[] surfaces,
        long[] weights) throws IOException {
    RAMDirectory dir = new RAMDirectory();
    FilterCodec filterCodec = new FilterCodec("filtered", Codec.getDefault()) {
        public PostingsFormat postingsFormat() {
            return mapper.postingsFormatProvider().get();
        }/*  w  ww  .j  av  a 2s. c o  m*/
    };
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(TEST_VERSION_CURRENT, mapper.indexAnalyzer());

    indexWriterConfig.setCodec(filterCodec);
    IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
    for (int i = 0; i < weights.length; i++) {
        Document doc = new Document();
        BytesRef payload = mapper.buildPayload(new BytesRef(surfaces[i]), weights[i],
                new BytesRef(Long.toString(weights[i])));
        doc.add(mapper.getCompletionField(terms[i], payload));
        if (randomBoolean()) {
            writer.commit();
        }
        writer.addDocument(doc);
    }
    writer.commit();
    writer.forceMerge(1);
    writer.commit();
    DirectoryReader reader = DirectoryReader.open(writer, true);
    assertThat(reader.leaves().size(), equalTo(1));
    assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length));
    AtomicReaderContext atomicReaderContext = reader.leaves().get(0);
    Terms luceneTerms = atomicReaderContext.reader().terms(mapper.name());
    Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms).getLookup(mapper, false);
    reader.close();
    writer.close();
    dir.close();
    return lookup;
}