List of usage examples for org.apache.lucene.index IndexWriterConfig setMergePolicy
@Override
public IndexWriterConfig setMergePolicy(MergePolicy mergePolicy)
From source file:org.apache.jackrabbit.core.query.lucene.IndexMigration.java
License:Apache License
/** * Checks if the given <code>index</code> needs to be migrated. * * @param index the index to check and migration if needed. * @param directoryManager the directory manager. * @param oldSeparatorChar the old separator char that needs to be replaced. * @throws IOException if an error occurs while migrating the index. */// www . j a v a 2 s. c o m public static void migrate(PersistentIndex index, DirectoryManager directoryManager, char oldSeparatorChar) throws IOException { Directory indexDir = index.getDirectory(); log.debug("Checking {} ...", indexDir); ReadOnlyIndexReader reader = index.getReadOnlyIndexReader(); try { if (IndexFormatVersion.getVersion(reader).getVersion() >= IndexFormatVersion.V3.getVersion()) { // index was created with Jackrabbit 1.5 or higher // no need for migration log.debug("IndexFormatVersion >= V3, no migration needed"); return; } // assert: there is at least one node in the index, otherwise the // index format version would be at least V3 TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, "")); try { Term t = terms.term(); if (t.text().indexOf(oldSeparatorChar) == -1) { log.debug("Index already migrated"); return; } } finally { terms.close(); } } finally { reader.release(); index.releaseWriterAndReaders(); } // if we get here then the index must be migrated log.debug("Index requires migration {}", indexDir); String migrationName = index.getName() + "_v36"; if (directoryManager.hasDirectory(migrationName)) { directoryManager.delete(migrationName); } Directory migrationDir = directoryManager.getDirectory(migrationName); final IndexWriterConfig c = new IndexWriterConfig(Version.LUCENE_36, new JackrabbitAnalyzer()); c.setMergePolicy(new UpgradeIndexMergePolicy(new LogByteSizeMergePolicy())); c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); try { IndexWriter writer = new IndexWriter(migrationDir, c); try { IndexReader r = new MigrationIndexReader(IndexReader.open(index.getDirectory()), oldSeparatorChar); try { writer.addIndexes(r); writer.forceMerge(1); writer.close(); } finally { r.close(); } } finally { writer.close(); } } finally { migrationDir.close(); } directoryManager.delete(index.getName()); if (!directoryManager.rename(migrationName, index.getName())) { throw new IOException("failed to move migrated directory " + migrationDir); } log.info("Migrated " + index.getName()); }
From source file:org.apache.nutch.indexwriter.lucene.LuceneWriter.java
License:Apache License
public void open(JobConf job, String name) throws IOException { this.fs = FileSystem.get(job); perm = new Path(FileOutputFormat.getOutputPath(job), name); temp = job.getLocalPath("index/_" + Integer.toString(new Random().nextInt())); fs.delete(perm, true); // delete old, if any analyzerFactory = new AnalyzerFactory(job); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_10_2, new SmartChineseAnalyzer()); LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setMergeFactor(job.getInt("indexer.mergeFactor", 10)); mergePolicy.setMaxMergeDocs(job.getInt("indexer.maxMergeDocs", Integer.MAX_VALUE)); indexWriterConfig.setMergePolicy(mergePolicy); indexWriterConfig.setUseCompoundFile(false); indexWriterConfig.setTermIndexInterval(job.getInt("indexer.termIndexInterval", 128)); indexWriterConfig.setMaxBufferedDocs(job.getInt("indexer.minMergeDocs", 100)); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); writer = new org.apache.lucene.index.IndexWriter( FSDirectory.open(new File(fs.startLocalOutput(perm, temp).toString())), indexWriterConfig); /*/*from www . ja v a 2 s. c om*/ * addFieldOptions("title", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job); * addFieldOptions("url", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job); * addFieldOptions("content", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job); * addFieldOptions("lang", STORE.YES, INDEX.UNTOKENIZED, VECTOR.NO, job); */ processOptions(job); }
From source file:org.apache.oodt.cas.filemgr.catalog.LuceneCatalog.java
License:Apache License
private synchronized void removeProductDocument(Product product) throws CatalogException { try {//w w w . j a v a 2 s .c om reader = DirectoryReader.open(indexDir); } catch (IOException e) { e.printStackTrace(); } try { LOG.log(Level.FINE, "LuceneCatalog: remove document from index for product: [" + product.getProductId() + "]"); IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMergeFactor(mergeFactor); config.setMergePolicy(lmp); IndexWriter writer = new IndexWriter(indexDir, config); writer.deleteDocuments(new Term("product_id", product.getProductId())); writer.close(); } catch (IOException e) { LOG.log(Level.WARNING, "Exception removing product: [" + product.getProductName() + "] from index: Message: " + e.getMessage()); throw new CatalogException(e.getMessage(), e); } finally { if (reader != null) { try { reader.close(); } catch (Exception ignore) { } } } }
From source file:org.apache.oodt.cas.filemgr.catalog.LuceneCatalog.java
License:Apache License
private synchronized void addCompleteProductToIndex(CompleteProduct cp) throws CatalogException { IndexWriter writer = null;// w w w . j a v a 2s.c om try { /*writer = new IndexWriter(indexFilePath, new StandardAnalyzer(), createIndex);*/ //writer.setCommitLockTimeout(this.commitLockTimeout * 1000); //writer.setWriteLockTimeout(this.writeLockTimeout * 1000); IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMergeFactor(mergeFactor); config.setMergePolicy(lmp); writer = new IndexWriter(indexDir, config); Document doc = toDoc(cp.getProduct(), cp.getMetadata()); writer.addDocument(doc); // TODO: determine a better way to optimize the index } catch (Exception e) { LOG.log(Level.WARNING, "Unable to index product: [" + cp.getProduct().getProductName() + "]: Message: " + e.getMessage(), e); throw new CatalogException("Unable to index product: [" + cp.getProduct().getProductName() + "]: Message: " + e.getMessage(), e); } finally { try { if (writer != null) { writer.close(); } } catch (Exception e) { System.out.println("failed" + e.getLocalizedMessage()); } } }
From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepository.java
License:Apache License
private synchronized void removeWorkflowInstanceDocument(WorkflowInstance inst) throws InstanceRepositoryException { IndexReader reader = null;/* w ww. j a va2s. c o m*/ try { reader = DirectoryReader.open(indexDir); } catch (IOException e) { e.printStackTrace(); } try { reader = DirectoryReader.open(indexDir); IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMergeFactor(mergeFactor); config.setMergePolicy(lmp); IndexWriter writer = new IndexWriter(indexDir, config); LOG.log(Level.FINE, "LuceneWorkflowEngine: remove document from index for workflow instance: [" + inst.getId() + "]"); writer.deleteDocuments(new Term("workflow_inst_id", inst.getId())); writer.close(); } catch (IOException e) { LOG.log(Level.SEVERE, e.getMessage()); LOG.log(Level.WARNING, "Exception removing workflow instance: [" + inst.getId() + "] from index: Message: " + e.getMessage()); throw new InstanceRepositoryException(e.getMessage()); } finally { if (reader != null) { try { reader.close(); } catch (Exception ignore) { } } } }
From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepository.java
License:Apache License
private synchronized void addWorkflowInstanceToCatalog(WorkflowInstance wInst) throws InstanceRepositoryException { IndexWriter writer = null;// ww w . j a v a2 s. com try { IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMergeFactor(mergeFactor); config.setMergePolicy(lmp); writer = new IndexWriter(indexDir, config); Document doc = toDoc(wInst); writer.addDocument(doc); } catch (IOException e) { LOG.log(Level.WARNING, "Unable to index workflow instance: [" + wInst.getId() + "]: Message: " + e.getMessage()); throw new InstanceRepositoryException( "Unable to index workflow instance: [" + wInst.getId() + "]: Message: " + e.getMessage()); } finally { try { writer.close(); } catch (Exception e) { System.out.println(e); } } }
From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepositoryFactory.java
License:Apache License
public WorkflowInstanceRepository createInstanceRepository() { Directory indexDir = null;//from w w w . j ava2 s .c om try { indexDir = FSDirectory.open(new File(indexFilePath).toPath()); } catch (IOException e) { e.printStackTrace(); } // Create the index if it does not already exist IndexWriter writer = null; try { IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); config.setMergePolicy(lmp); writer = new IndexWriter(indexDir, config); } catch (Exception e) { LOG.severe("Unable to create index: " + e.getMessage()); } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { LOG.severe("Unable to close index: " + e.getMessage()); } } } return new LuceneWorkflowInstanceRepository(indexFilePath, pageSize); }
From source file:org.apache.solr.core.snapshots.SolrSnapshotManager.java
License:Apache License
/** * This method deletes index files of the {@linkplain IndexCommit} for the specified generation number. * * @param core The Solr core/*from w ww . j a v a2s . c om*/ * @param dir The index directory storing the snapshot. * @throws IOException in case of I/O errors. */ private static void deleteSnapshotIndexFiles(SolrCore core, Directory dir, IndexDeletionPolicy delPolicy) throws IOException { IndexWriterConfig conf = core.getSolrConfig().indexConfig.toIndexWriterConfig(core); conf.setOpenMode(OpenMode.APPEND); conf.setMergePolicy(NoMergePolicy.INSTANCE);//Don't want to merge any commits here! conf.setIndexDeletionPolicy(delPolicy); conf.setCodec(core.getCodec()); try (SolrIndexWriter iw = new SolrIndexWriter("SolrSnapshotCleaner", dir, conf)) { // Do nothing. The only purpose of opening index writer is to invoke the Lucene IndexDeletionPolicy#onInit // method so that we can cleanup the files associated with specified index commit. // Note the index writer creates a new commit during the close() operation (which is harmless). } }
From source file:org.apache.solr.uninverting.TestDocTermOrds.java
License:Apache License
public void testSortedTermsEnum() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new StringField("field", "hello", Field.Store.NO)); iwriter.addDocument(doc);/*from w ww. j a v a2 s . c o m*/ doc = new Document(); doc.add(new StringField("field", "world", Field.Store.NO)); // we need a second value for a doc, or we don't actually test DocTermOrds! doc.add(new StringField("field", "hello", Field.Store.NO)); iwriter.addDocument(doc); doc = new Document(); doc.add(new StringField("field", "beer", Field.Store.NO)); iwriter.addDocument(doc); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); LeafReader ar = getOnlyLeafReader(ireader); SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null); assertEquals(3, dv.getValueCount()); TermsEnum termsEnum = dv.termsEnum(); // next() assertEquals("beer", termsEnum.next().utf8ToString()); assertEquals(0, termsEnum.ord()); assertEquals("hello", termsEnum.next().utf8ToString()); assertEquals(1, termsEnum.ord()); assertEquals("world", termsEnum.next().utf8ToString()); assertEquals(2, termsEnum.ord()); // seekCeil() assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!"))); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer"))); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz"))); // seekExact() assertTrue(termsEnum.seekExact(new BytesRef("beer"))); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); assertTrue(termsEnum.seekExact(new BytesRef("hello"))); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); assertTrue(termsEnum.seekExact(new BytesRef("world"))); assertEquals("world", termsEnum.term().utf8ToString()); assertEquals(2, termsEnum.ord()); assertFalse(termsEnum.seekExact(new BytesRef("bogus"))); // seek(ord) termsEnum.seekExact(0); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); termsEnum.seekExact(1); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); termsEnum.seekExact(2); assertEquals("world", termsEnum.term().utf8ToString()); assertEquals(2, termsEnum.ord()); // lookupTerm(BytesRef) assertEquals(-1, dv.lookupTerm(new BytesRef("apple"))); assertEquals(0, dv.lookupTerm(new BytesRef("beer"))); assertEquals(-2, dv.lookupTerm(new BytesRef("car"))); assertEquals(1, dv.lookupTerm(new BytesRef("hello"))); assertEquals(-3, dv.lookupTerm(new BytesRef("matter"))); assertEquals(2, dv.lookupTerm(new BytesRef("world"))); assertEquals(-4, dv.lookupTerm(new BytesRef("zany"))); ireader.close(); directory.close(); }
From source file:org.apache.solr.uninverting.TestDocTermOrds.java
License:Apache License
public void testActuallySingleValued() throws IOException { Directory dir = newDirectory();/*from ww w . j a va 2s . co m*/ IndexWriterConfig iwconfig = newIndexWriterConfig(null); iwconfig.setMergePolicy(newLogMergePolicy()); IndexWriter iw = new IndexWriter(dir, iwconfig); Document doc = new Document(); doc.add(new StringField("foo", "bar", Field.Store.NO)); iw.addDocument(doc); doc = new Document(); doc.add(new StringField("foo", "baz", Field.Store.NO)); iw.addDocument(doc); doc = new Document(); iw.addDocument(doc); doc = new Document(); doc.add(new StringField("foo", "baz", Field.Store.NO)); doc.add(new StringField("foo", "baz", Field.Store.NO)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); DirectoryReader ir = DirectoryReader.open(dir); LeafReader ar = getOnlyLeafReader(ir); SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null); assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field assertEquals(2, v.getValueCount()); assertEquals(0, v.nextDoc()); assertEquals(0, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); assertEquals(1, v.nextDoc()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); assertEquals(3, v.nextDoc()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); BytesRef value = v.lookupOrd(0); assertEquals("bar", value.utf8ToString()); value = v.lookupOrd(1); assertEquals("baz", value.utf8ToString()); ir.close(); dir.close(); }