List of usage examples for org.apache.lucene.index SegmentInfos getSegmentsFileName
public String getSegmentsFileName()
From source file:org.codelibs.elasticsearch.common.lucene.Lucene.java
License:Apache License
/** * Returns an iterable that allows to iterate over all files in this segments info *///from w w w. j a va2 s . c om public static Iterable<String> files(SegmentInfos infos) throws IOException { final List<Collection<String>> list = new ArrayList<>(); list.add(Collections.singleton(infos.getSegmentsFileName())); for (SegmentCommitInfo info : infos) { list.add(info.files()); } return Iterables.flatten(list); }
From source file:org.codelibs.elasticsearch.common.lucene.Lucene.java
License:Apache License
/** * This method removes all files from the given directory that are not referenced by the given segments file. * This method will open an IndexWriter and relies on index file deleter to remove all unreferenced files. Segment files * that are newer than the given segments file are removed forcefully to prevent problems with IndexWriter opening a potentially * broken commit point / leftover./*from w w w . j a v a2 s . com*/ * <b>Note:</b> this method will fail if there is another IndexWriter open on the given directory. This method will also acquire * a write lock from the directory while pruning unused files. This method expects an existing index in the given directory that has * the given segments file. */ public static SegmentInfos pruneUnreferencedFiles(String segmentsFileName, Directory directory) throws IOException { final SegmentInfos si = readSegmentInfos(segmentsFileName, directory); try (Lock writeLock = directory.obtainLock(IndexWriter.WRITE_LOCK_NAME)) { int foundSegmentFiles = 0; for (final String file : directory.listAll()) { /** * we could also use a deletion policy here but in the case of snapshot and restore * sometimes we restore an index and override files that were referenced by a "future" * commit. If such a commit is opened by the IW it would likely throw a corrupted index exception * since checksums don's match anymore. that's why we prune the name here directly. * We also want the caller to know if we were not able to remove a segments_N file. */ if (file.startsWith(IndexFileNames.SEGMENTS) || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) { foundSegmentFiles++; if (file.equals(si.getSegmentsFileName()) == false) { directory.deleteFile(file); // remove all segment_N files except of the one we wanna keep } } } assert SegmentInfos.getLastCommitSegmentsFileName(directory).equals(segmentsFileName); if (foundSegmentFiles == 0) { throw new IllegalStateException("no commit found in the directory"); } } final CommitPoint cp = new CommitPoint(si, directory); try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Lucene.STANDARD_ANALYZER).setIndexCommit(cp).setCommitOnClose(false) .setMergePolicy(NoMergePolicy.INSTANCE).setOpenMode(IndexWriterConfig.OpenMode.APPEND))) { // do nothing and close this will kick of IndexFileDeleter which will remove all pending files } return si; }
From source file:org.elasticsearch.common.lucene.LuceneTests.java
License:Apache License
public void testPruneUnreferencedFiles() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); dir.setEnableVirusScanner(false);//ww w . j a v a 2s . c o m IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir); doc = new Document(); doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.deleteDocuments(new Term("id", "2")); writer.commit(); DirectoryReader open = DirectoryReader.open(writer, true); assertEquals(3, open.numDocs()); assertEquals(1, open.numDeletedDocs()); assertEquals(4, open.maxDoc()); open.close(); writer.close(); SegmentInfos si = Lucene.pruneUnreferencedFiles(segmentCommitInfos.getSegmentsFileName(), dir); assertEquals(si.getSegmentsFileName(), segmentCommitInfos.getSegmentsFileName()); open = DirectoryReader.open(dir); assertEquals(3, open.numDocs()); assertEquals(0, open.numDeletedDocs()); assertEquals(3, open.maxDoc()); IndexSearcher s = new IndexSearcher(open); assertEquals(s.search(new TermQuery(new Term("id", "1")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "2")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "3")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "4")), 1).totalHits, 0); for (String file : dir.listAll()) { assertFalse("unexpected file: " + file, file.equals("segments_3") || file.startsWith("_2")); } open.close(); dir.close(); }
From source file:org.elasticsearch.index.store.StoreTest.java
License:Apache License
public void testMarkCorruptedOnTruncatedSegmentsFile() throws IOException { IndexWriterConfig iwc = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setCodec(actualDefaultCodec()); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random(), false); Store store = new Store(shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(directoryService), new DummyShardLock(shardId)); IndexWriter writer = new IndexWriter(store.directory(), iwc); int numDocs = 1 + random().nextInt(10); List<Document> docs = new ArrayList<>(); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random())))); docs.add(doc);/* w w w . j a v a 2 s.c o m*/ } for (Document d : docs) { writer.addDocument(d); } writer.commit(); writer.close(); SegmentInfos segmentCommitInfos = store.readLastCommittedSegmentsInfo(); try (IndexOutput out = store.directory().createOutput(segmentCommitInfos.getSegmentsFileName(), IOContext.DEFAULT)) { // empty file } try { if (randomBoolean()) { store.getMetadata(); } else { store.readLastCommittedSegmentsInfo(); } fail("corrupted segments_N file"); } catch (CorruptIndexException ex) { // expected } assertTrue(store.isMarkedCorrupted()); Lucene.cleanLuceneIndex(store.directory()); // we have to remove the index since it's corrupted and might fail the MocKDirWrapper checkindex call store.close(); }
From source file:org.elasticsearch.index.store.StoreTests.java
License:Apache License
public void testMarkCorruptedOnTruncatedSegmentsFile() throws IOException { IndexWriterConfig iwc = newIndexWriterConfig(); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random()); Store store = new Store(shardId, Settings.EMPTY, directoryService, new DummyShardLock(shardId)); IndexWriter writer = new IndexWriter(store.directory(), iwc); int numDocs = 1 + random().nextInt(10); List<Document> docs = new ArrayList<>(); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random())))); docs.add(doc);//from www. ja v a 2s .co m } for (Document d : docs) { writer.addDocument(d); } writer.commit(); writer.close(); MockDirectoryWrapper leaf = DirectoryUtils.getLeaf(store.directory(), MockDirectoryWrapper.class); if (leaf != null) { leaf.setPreventDoubleWrite(false); // I do this on purpose } SegmentInfos segmentCommitInfos = store.readLastCommittedSegmentsInfo(); try (IndexOutput out = store.directory().createOutput(segmentCommitInfos.getSegmentsFileName(), IOContext.DEFAULT)) { // empty file } try { if (randomBoolean()) { store.getMetadata(); } else { store.readLastCommittedSegmentsInfo(); } fail("corrupted segments_N file"); } catch (CorruptIndexException ex) { // expected } assertTrue(store.isMarkedCorrupted()); Lucene.cleanLuceneIndex(store.directory()); // we have to remove the index since it's corrupted and might fail the MocKDirWrapper checkindex call store.close(); }