List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:org.apache.nutch.indexwriter.lucene.LuceneWriter.java
License:Apache License
public void open(JobConf job, String name) throws IOException { this.fs = FileSystem.get(job); perm = new Path(FileOutputFormat.getOutputPath(job), name); temp = job.getLocalPath("index/_" + Integer.toString(new Random().nextInt())); fs.delete(perm, true); // delete old, if any analyzerFactory = new AnalyzerFactory(job); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_10_2, new SmartChineseAnalyzer()); LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setMergeFactor(job.getInt("indexer.mergeFactor", 10)); mergePolicy.setMaxMergeDocs(job.getInt("indexer.maxMergeDocs", Integer.MAX_VALUE)); indexWriterConfig.setMergePolicy(mergePolicy); indexWriterConfig.setUseCompoundFile(false); indexWriterConfig.setTermIndexInterval(job.getInt("indexer.termIndexInterval", 128)); indexWriterConfig.setMaxBufferedDocs(job.getInt("indexer.minMergeDocs", 100)); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); writer = new org.apache.lucene.index.IndexWriter( FSDirectory.open(new File(fs.startLocalOutput(perm, temp).toString())), indexWriterConfig); /*//from w ww. jav a 2 s . com * addFieldOptions("title", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job); * addFieldOptions("url", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job); * addFieldOptions("content", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job); * addFieldOptions("lang", STORE.YES, INDEX.UNTOKENIZED, VECTOR.NO, job); */ processOptions(job); }
From source file:org.apache.oodt.cas.filemgr.catalog.LuceneCatalog.java
License:Apache License
private synchronized void removeProductDocument(Product product) throws CatalogException { try {/*w w w . j ava 2s .co m*/ reader = DirectoryReader.open(indexDir); } catch (IOException e) { e.printStackTrace(); } try { LOG.log(Level.FINE, "LuceneCatalog: remove document from index for product: [" + product.getProductId() + "]"); IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMergeFactor(mergeFactor); config.setMergePolicy(lmp); IndexWriter writer = new IndexWriter(indexDir, config); writer.deleteDocuments(new Term("product_id", product.getProductId())); writer.close(); } catch (IOException e) { LOG.log(Level.WARNING, "Exception removing product: [" + product.getProductName() + "] from index: Message: " + e.getMessage()); throw new CatalogException(e.getMessage(), e); } finally { if (reader != null) { try { reader.close(); } catch (Exception ignore) { } } } }
From source file:org.apache.oodt.cas.filemgr.catalog.LuceneCatalog.java
License:Apache License
private synchronized void addCompleteProductToIndex(CompleteProduct cp) throws CatalogException { IndexWriter writer = null;/* w ww . ja va 2 s. co m*/ try { /*writer = new IndexWriter(indexFilePath, new StandardAnalyzer(), createIndex);*/ //writer.setCommitLockTimeout(this.commitLockTimeout * 1000); //writer.setWriteLockTimeout(this.writeLockTimeout * 1000); IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMergeFactor(mergeFactor); config.setMergePolicy(lmp); writer = new IndexWriter(indexDir, config); Document doc = toDoc(cp.getProduct(), cp.getMetadata()); writer.addDocument(doc); // TODO: determine a better way to optimize the index } catch (Exception e) { LOG.log(Level.WARNING, "Unable to index product: [" + cp.getProduct().getProductName() + "]: Message: " + e.getMessage(), e); throw new CatalogException("Unable to index product: [" + cp.getProduct().getProductName() + "]: Message: " + e.getMessage(), e); } finally { try { if (writer != null) { writer.close(); } } catch (Exception e) { System.out.println("failed" + e.getLocalizedMessage()); } } }
From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepository.java
License:Apache License
private synchronized void removeWorkflowInstanceDocument(WorkflowInstance inst) throws InstanceRepositoryException { IndexReader reader = null;//from ww w. ja v a 2 s.c o m try { reader = DirectoryReader.open(indexDir); } catch (IOException e) { e.printStackTrace(); } try { reader = DirectoryReader.open(indexDir); IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMergeFactor(mergeFactor); config.setMergePolicy(lmp); IndexWriter writer = new IndexWriter(indexDir, config); LOG.log(Level.FINE, "LuceneWorkflowEngine: remove document from index for workflow instance: [" + inst.getId() + "]"); writer.deleteDocuments(new Term("workflow_inst_id", inst.getId())); writer.close(); } catch (IOException e) { LOG.log(Level.SEVERE, e.getMessage()); LOG.log(Level.WARNING, "Exception removing workflow instance: [" + inst.getId() + "] from index: Message: " + e.getMessage()); throw new InstanceRepositoryException(e.getMessage()); } finally { if (reader != null) { try { reader.close(); } catch (Exception ignore) { } } } }
From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepository.java
License:Apache License
private synchronized void addWorkflowInstanceToCatalog(WorkflowInstance wInst) throws InstanceRepositoryException { IndexWriter writer = null;//from w ww . ja v a 2s . com try { IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMergeFactor(mergeFactor); config.setMergePolicy(lmp); writer = new IndexWriter(indexDir, config); Document doc = toDoc(wInst); writer.addDocument(doc); } catch (IOException e) { LOG.log(Level.WARNING, "Unable to index workflow instance: [" + wInst.getId() + "]: Message: " + e.getMessage()); throw new InstanceRepositoryException( "Unable to index workflow instance: [" + wInst.getId() + "]: Message: " + e.getMessage()); } finally { try { writer.close(); } catch (Exception e) { System.out.println(e); } } }
From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepositoryFactory.java
License:Apache License
public WorkflowInstanceRepository createInstanceRepository() { Directory indexDir = null;/*from ww w. ja v a 2 s.c o m*/ try { indexDir = FSDirectory.open(new File(indexFilePath).toPath()); } catch (IOException e) { e.printStackTrace(); } // Create the index if it does not already exist IndexWriter writer = null; try { IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); config.setMergePolicy(lmp); writer = new IndexWriter(indexDir, config); } catch (Exception e) { LOG.severe("Unable to create index: " + e.getMessage()); } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { LOG.severe("Unable to close index: " + e.getMessage()); } } } return new LuceneWorkflowInstanceRepository(indexFilePath, pageSize); }
From source file:org.apache.pdfbox.examples.lucene.IndexPDFFiles.java
License:Apache License
/** * Index all text files under a directory. * /*www . jav a 2s . c om*/ * @param args command line arguments * */ public static void main(String[] args) { String usage = "java org.apache.pdfbox.lucene.IndexPDFFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes all PDF documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null; boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:org.apache.solr.codecs.test.testDeleteDocs.java
License:Apache License
public static void main(String[] args) { try {/*from w w w . jav a 2 s.co m*/ plaintextDir = assureDirectoryExists(new File(INDEX_ROOT_FOLDER)); //----------- index documents ------- StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_0); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_0, analyzer); // recreate the index on each execution config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); //config.setCodec(new SimpleTextCodec()); Properties props = new Properties(); FileInputStream fstream = new FileInputStream( "C:\\work\\search_engine\\codec\\solr410\\solr_codectest\\collection1\\conf\\kvstore.properties"); props.load(fstream); fstream.close(); ONSQLKVstoreHandler.getInstance().setKVStore("omega", props); ONSQLCodec codec = new ONSQLCodec(); config.setCodec(codec); config.setUseCompoundFile(false); Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER)); IndexWriter writer = new IndexWriter(luceneDir, config); QueryParser queryParser = new QueryParser(Version.LUCENE_4_10_0, "title", analyzer); String search_word = "fourth"; Query query = queryParser.parse(search_word); writer.deleteDocuments(query); writer.commit(); writer.close(); searchIndex("title", search_word); } catch (Throwable te) { te.printStackTrace(); } }
From source file:org.apache.solr.codecs.test.testMergeSegments.java
License:Apache License
public static void main(String[] args) { try {/*from ww w .j a v a2s . c o m*/ testUtil.initPropsONSQL(); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); ONSQLCodec codec = new ONSQLCodec(); config.setCodec(codec); config.setUseCompoundFile(false); Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER)); IndexWriter writer = new IndexWriter(luceneDir, config); writer.forceMerge(1); writer.close(); } catch (Throwable te) { te.printStackTrace(); } }
From source file:org.apache.solr.codecs.test.testONSQLCodec.java
License:Apache License
public static void main(String[] args) { try {/*w ww .ja va 2s . com*/ plaintextDir = assureDirectoryExists(new File(INDEX_ROOT_FOLDER)); testUtil.initPropsONSQL(); //----------- index documents ------- StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer); // recreate the index on each execution config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); //config.setCodec(new SimpleTextCodec()); ONSQLCodec codec = new ONSQLCodec(); config.setCodec(codec); config.setUseCompoundFile(false); Directory luceneDir = FSDirectory.open(plaintextDir); IndexWriter writer = new IndexWriter(luceneDir, config); writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES), new TextField("content", "The content of the first document", Store.YES), new IntField("intval", 111111, Store.YES), new LongField("longval", 1111111111L, Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The tAtle of the second document", Store.YES), new TextField("content", "The content of the second document", Store.YES), new IntField("intval", 222222, Store.YES), new LongField("longval", 222222222L, Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The title of the third document", Store.YES), new TextField("content", "The content of the third document", Store.YES), new IntField("intval", 333333, Store.YES), new LongField("longval", 3333333333L, Store.YES))); writer.commit(); writer.close(); IndexReader reader = DirectoryReader.open(luceneDir); // now test for docs if (reader.numDocs() < 3) throw new IOException("amount of returned docs are less than indexed"); else System.out.println("test passed"); searchIndex("content", "third"); } catch (Throwable te) { te.printStackTrace(); } }