List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:org.elasticsearch.index.store.StoreTests.java
License:Apache License
@Test public void testRecoveryDiff() throws IOException, InterruptedException { int numDocs = 2 + random().nextInt(100); List<Document> docs = new ArrayList<>(); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random())))); docs.add(doc);/*ww w. j a va 2 s . com*/ } long seed = random().nextLong(); Store.MetadataSnapshot first; { Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)) .setCodec(TestUtil.getDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random); Store store = new Store(shardId, Settings.EMPTY, directoryService, new DummyShardLock(shardId)); IndexWriter writer = new IndexWriter(store.directory(), iwc); final boolean lotsOfSegments = rarely(random); for (Document d : docs) { writer.addDocument(d); if (lotsOfSegments && random.nextBoolean()) { writer.commit(); } else if (rarely(random)) { writer.commit(); } } writer.commit(); writer.close(); first = store.getMetadata(); assertDeleteContent(store, directoryService); store.close(); } long time = new Date().getTime(); while (time == new Date().getTime()) { Thread.sleep(10); // bump the time } Store.MetadataSnapshot second; Store store; { Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)) .setCodec(TestUtil.getDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random); store = new Store(shardId, Settings.EMPTY, directoryService, new DummyShardLock(shardId)); IndexWriter writer = new IndexWriter(store.directory(), iwc); final boolean lotsOfSegments = rarely(random); for (Document d : docs) { writer.addDocument(d); if (lotsOfSegments && random.nextBoolean()) { writer.commit(); } else if (rarely(random)) { writer.commit(); } } writer.commit(); writer.close(); second = store.getMetadata(); } Store.RecoveryDiff diff = first.recoveryDiff(second); assertThat(first.size(), equalTo(second.size())); for (StoreFileMetaData md : first) { assertThat(second.get(md.name()), notNullValue()); // si files are different - containing timestamps etc assertThat(second.get(md.name()).isSame(md), equalTo(false)); } assertThat(diff.different.size(), equalTo(first.size())); assertThat(diff.identical.size(), equalTo(0)); // in lucene 5 nothing is identical - we use random ids in file headers assertThat(diff.missing, empty()); // check the self diff Store.RecoveryDiff selfDiff = first.recoveryDiff(first); assertThat(selfDiff.identical.size(), equalTo(first.size())); assertThat(selfDiff.different, empty()); assertThat(selfDiff.missing, empty()); // lets add some deletes Random random = new Random(seed); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)) .setCodec(TestUtil.getDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(random.nextBoolean()); iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); IndexWriter writer = new IndexWriter(store.directory(), iwc); writer.deleteDocuments(new Term("id", Integer.toString(random().nextInt(numDocs)))); writer.commit(); writer.close(); Store.MetadataSnapshot metadata = store.getMetadata(); StoreFileMetaData delFile = null; for (StoreFileMetaData md : metadata) { if (md.name().endsWith(".liv")) { delFile = md; break; } } Store.RecoveryDiff afterDeleteDiff = metadata.recoveryDiff(second); if (delFile != null) { assertThat(afterDeleteDiff.identical.size(), equalTo(metadata.size() - 2)); // segments_N + del file assertThat(afterDeleteDiff.different.size(), equalTo(0)); assertThat(afterDeleteDiff.missing.size(), equalTo(2)); } else { // an entire segment must be missing (single doc segment got dropped) assertThat(afterDeleteDiff.identical.size(), greaterThan(0)); assertThat(afterDeleteDiff.different.size(), equalTo(0)); assertThat(afterDeleteDiff.missing.size(), equalTo(1)); // the commit file is different } // check the self diff selfDiff = metadata.recoveryDiff(metadata); assertThat(selfDiff.identical.size(), equalTo(metadata.size())); assertThat(selfDiff.different, empty()); assertThat(selfDiff.missing, empty()); // add a new commit iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(TestUtil.getDefaultCodec()); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setUseCompoundFile(true); // force CFS - easier to test here since we know it will add 3 files iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); writer = new IndexWriter(store.directory(), iwc); writer.addDocument(docs.get(0)); writer.close(); Store.MetadataSnapshot newCommitMetaData = store.getMetadata(); Store.RecoveryDiff newCommitDiff = newCommitMetaData.recoveryDiff(metadata); if (delFile != null) { assertThat(newCommitDiff.identical.size(), equalTo(newCommitMetaData.size() - 5)); // segments_N, del file, cfs, cfe, si for the new segment assertThat(newCommitDiff.different.size(), equalTo(1)); // the del file must be different assertThat(newCommitDiff.different.get(0).name(), endsWith(".liv")); assertThat(newCommitDiff.missing.size(), equalTo(4)); // segments_N,cfs, cfe, si for the new segment } else { assertThat(newCommitDiff.identical.size(), equalTo(newCommitMetaData.size() - 4)); // segments_N, cfs, cfe, si for the new segment assertThat(newCommitDiff.different.size(), equalTo(0)); assertThat(newCommitDiff.missing.size(), equalTo(4)); // an entire segment must be missing (single doc segment got dropped) plus the commit is different } deleteContent(store.directory()); IOUtils.close(store); }
From source file:org.elasticsearch.termvectors.AbstractTermVectorTests.java
License:Apache License
protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException { Map<String, Analyzer> mapping = new HashMap<String, Analyzer>(); for (TestFieldSetting field : testDocs[0].fieldSettings) { if (field.storedPayloads) { mapping.put(field.name, new Analyzer() { @Override//from w w w.j av a2 s.c o m protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader); TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer); filter = new TypeAsPayloadTokenFilter(filter); return new TokenStreamComponents(tokenizer, filter); } }); } } PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.CURRENT.luceneVersion), mapping); Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); for (TestDoc doc : testDocs) { Document d = new Document(); d.add(new Field("id", doc.id, StringField.TYPE_STORED)); for (int i = 0; i < doc.fieldContent.length; i++) { FieldType type = new FieldType(TextField.TYPE_STORED); TestFieldSetting fieldSetting = doc.fieldSettings[i]; type.setStoreTermVectorOffsets(fieldSetting.storedOffset); type.setStoreTermVectorPayloads(fieldSetting.storedPayloads); type.setStoreTermVectorPositions( fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset); type.setStoreTermVectors(true); type.freeze(); d.add(new Field(fieldSetting.name, doc.fieldContent[i], type)); } writer.updateDocument(new Term("id", doc.id), d); writer.commit(); } writer.close(); return DirectoryReader.open(dir); }
From source file:org.elasticsearch.test.integration.termvectors.GetTermVectorTests.java
License:Apache License
private void writeStandardTermVector(TermVectorResponse outResponse) throws IOException { Directory dir = FSDirectory.open(new File("/tmp/foo")); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true);/*from www. j av a 2s . com*/ type.freeze(); Document d = new Document(); d.add(new Field("id", "abc", StringField.TYPE_STORED)); d.add(new Field("title", "the1 quick brown fox jumps over the1 lazy dog", type)); d.add(new Field("desc", "the1 quick brown fox jumps over the1 lazy dog", type)); writer.updateDocument(new Term("id", "abc"), d); writer.commit(); writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1); ScoreDoc[] scoreDocs = search.scoreDocs; int doc = scoreDocs[0].doc; Fields fields = dr.getTermVectors(doc); EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets); outResponse.setFields(fields, null, flags, fields); }
From source file:org.elasticsearch.test.integration.termvectors.GetTermVectorTests.java
License:Apache License
private Fields buildWithLuceneAndReturnFields(String docId, String[] fields, String[] content, boolean[] withPositions, boolean[] withOffsets, boolean[] withPayloads) throws IOException { assert (fields.length == withPayloads.length); assert (content.length == withPayloads.length); assert (withPositions.length == withPayloads.length); assert (withOffsets.length == withPayloads.length); Map<String, Analyzer> mapping = new HashMap<String, Analyzer>(); for (int i = 0; i < withPayloads.length; i++) { if (withPayloads[i]) { mapping.put(fields[i], new Analyzer() { @Override/*from w w w. jav a2s . c o m*/ protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader); TokenFilter filter = new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer); filter = new TypeAsPayloadTokenFilter(filter); return new TokenStreamComponents(tokenizer, filter); } }); } } PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(TEST_VERSION_CURRENT), mapping); Directory dir = FSDirectory.open(new File("/tmp/foo")); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, wrapper); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); Document d = new Document(); for (int i = 0; i < fields.length; i++) { d.add(new Field("id", docId, StringField.TYPE_STORED)); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(withOffsets[i]); type.setStoreTermVectorPayloads(withPayloads[i]); type.setStoreTermVectorPositions(withPositions[i] || withOffsets[i] || withPayloads[i]); type.setStoreTermVectors(true); type.freeze(); d.add(new Field(fields[i], content[i], type)); writer.updateDocument(new Term("id", docId), d); writer.commit(); } writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", docId)), 1); ScoreDoc[] scoreDocs = search.scoreDocs; assert (scoreDocs.length == 1); int doc = scoreDocs[0].doc; Fields returnFields = dr.getTermVectors(doc); return returnFields; }
From source file:org.elasticsearch.test.unit.termvectors.TermVectorUnitTests.java
License:Apache License
private void writeEmptyTermVector(TermVectorResponse outResponse) throws IOException { Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true);//from w ww . j ava 2 s .c om type.freeze(); Document d = new Document(); d.add(new Field("id", "abc", StringField.TYPE_STORED)); writer.updateDocument(new Term("id", "abc"), d); writer.commit(); writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1); ScoreDoc[] scoreDocs = search.scoreDocs; int doc = scoreDocs[0].doc; Fields fields = dr.getTermVectors(doc); EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets); outResponse.setFields(fields, null, flags, fields); outResponse.setExists(true); }
From source file:org.elasticsearch.test.unit.termvectors.TermVectorUnitTests.java
License:Apache License
private void writeStandardTermVector(TermVectorResponse outResponse) throws IOException { Directory dir = FSDirectory.open(new File("/tmp/foo")); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true);// w ww . j a va 2 s . c o m type.freeze(); Document d = new Document(); d.add(new Field("id", "abc", StringField.TYPE_STORED)); d.add(new Field("title", "the1 quick brown fox jumps over the1 lazy dog", type)); d.add(new Field("desc", "the1 quick brown fox jumps over the1 lazy dog", type)); writer.updateDocument(new Term("id", "abc"), d); writer.commit(); writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1); ScoreDoc[] scoreDocs = search.scoreDocs; int doc = scoreDocs[0].doc; Fields termVectors = dr.getTermVectors(doc); EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets); outResponse.setFields(termVectors, null, flags, termVectors); }
From source file:org.exist.xquery.modules.mpeg7.net.semanticmetadata.lire.indexing.parallel.ParallelIndexer.java
License:Open Source License
public void run() { IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new StandardAnalyzer(LuceneUtils.LUCENE_VERSION)); config.setOpenMode(openMode); config.setCodec(new LireCustomCodec()); try {//from ww w.j av a 2s . c o m if (imageDirectory != null) System.out.println("Getting all images in " + imageDirectory + "."); writer = new IndexWriter(FSDirectory.open(new File(indexPath)), config); if (imageList == null) { files = FileUtils.getAllImages(new File(imageDirectory), true); } else { files = new LinkedList<String>(); BufferedReader br = new BufferedReader(new FileReader(imageList)); String line = null; while ((line = br.readLine()) != null) { if (line.trim().length() > 3) files.add(line.trim()); } } numImages = files.size(); System.out.println("Indexing " + files.size() + " images."); Thread p = new Thread(new Producer()); p.start(); LinkedList<Thread> threads = new LinkedList<Thread>(); long l = System.currentTimeMillis(); for (int i = 0; i < numberOfThreads; i++) { Thread c = new Thread(new Consumer()); c.start(); threads.add(c); } Thread m = new Thread(new Monitoring()); m.start(); for (Iterator<Thread> iterator = threads.iterator(); iterator.hasNext();) { iterator.next().join(); } long l1 = System.currentTimeMillis() - l; System.out.println("Analyzed " + overallCount + " images in " + l1 / 1000 + " seconds, ~" + ((overallCount > 0) ? (l1 / overallCount) : "n.a.") + " ms each."); writer.commit(); writer.close(); threadFinished = true; } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:org.exist.xquery.modules.mpeg7.net.semanticmetadata.lire.utils.LuceneUtils.java
License:Open Source License
/** * Creates an IndexWriter for given index path, with given analyzer. * * @param directory the path to the index directory * @param create set to true if you want to create a new index * @param analyzer gives the analyzer used for the Indexwriter. * @return an IndexWriter// w w w. ja va 2 s.c om * @throws IOException */ public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null; if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(LUCENE_VERSION); // LetterTokenizer with LowerCaseFilter else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(LUCENE_VERSION); // WhitespaceTokenizer else if (analyzer == AnalyzerType.KeywordAnalyzer) tmpAnalyzer = new KeywordAnalyzer(); // entire string as one token. // The config IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, tmpAnalyzer); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setCodec(new LireCustomCodec()); return new IndexWriter(directory, config); }
From source file:org.exist.xquery.modules.mpeg7.net.semanticmetadata.lire.utils.LuceneUtils.java
License:Open Source License
public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer, double RAMBufferSize) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null;//from w w w.j a v a2s . c o m if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(LUCENE_VERSION); else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(LUCENE_VERSION); // The config IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, tmpAnalyzer); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setRAMBufferSizeMB(RAMBufferSize); config.setCodec(new LireCustomCodec()); return new IndexWriter(directory, config); }
From source file:org.explore3.searchengine.indexCreator.ImageIndex.java
License:Apache License
public static void main(String[] args) { String indexPath = "Imageindex"; String DocsPath = "C:/Users/Pietro/Desktop/dataset/"; boolean create = true; final File DocDir = new File(DocsPath); if (!DocDir.exists() || !DocDir.canRead()) { System.out.println("Document directory '" + DocDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1);//from ww w . j a va2s . c o m } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory indexDir = FSDirectory.open(new File(indexPath)); // :Post-Release-Update-Version.LUCENE_XY: Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); IndexWriterConfig indexWriterConf = new IndexWriterConfig(Version.LUCENE_47, analyzer); if (create) { // Creo nuovo indice e rimuovo i precedenti indexWriterConf.setOpenMode(OpenMode.CREATE); } else { // Aggiungo il documento ad un indice esistente indexWriterConf.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(indexDir, indexWriterConf); indexDocument(writer, DocDir); writer.close(); IndexWriterConfig indexWriterConf2 = new IndexWriterConfig(Version.LUCENE_47, analyzer); System.out.println("Creating spell index..."); Spellindex.createSpellIndex("image", indexDir, indexDir, indexWriterConf2); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }