List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:com.adanac.module.blog.search.LuceneHelper.java
License:Apache License
private static void generateIndex(String path, String id, String title, String content, List<Map<String, String>> dataList) { try {/*from ww w .j av a2s . co m*/ Directory dir = FSDirectory.open(Paths.get(INDEX_PATH + path)); Analyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, indexWriterConfig); for (Map<String, String> data : dataList) { Document document = new Document(); Field idField = new IntField("id", Integer.valueOf(data.get(id)), Field.Store.YES); Field indexedContentField = new TextField("indexedContent", data.get(title) + SEPARATOR + data.get(content), Field.Store.YES); document.add(idField); document.add(indexedContentField); writer.addDocument(document); if (logger.isInfoEnabled()) { logger.info("add index for : [" + data.get(title) + "]"); } } writer.close(); } catch (Exception e) { logger.error("add index failed ...", e); } }
From source file:com.admarketplace.isg.lucene.demo.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//from ww w . j av a2 s . co m boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.aliasi.lingmed.medline.IndexMedline.java
License:Lingpipe license
/** * Run the command. See class documentation above for details on * arguments and behavior.//from w w w .ja va2s. c om */ public void run() { System.out.println("start run"); try { File[] files = getLaterFiles(mDistDir); System.out.println("Total files to process: " + files.length); System.out.println("File names: " + java.util.Arrays.asList(files)); // if (mLogger.isDebugEnabled()) // mLogger.debug("File names: " + java.util.Arrays.asList(files)); if (files.length > 0) { MedlineParser parser = new MedlineParser(true); // true = save raw XML Directory fsDir = FSDirectory.open(mIndex); IndexWriterConfig iwConf = new IndexWriterConfig(Version.LUCENE_36, mCodec.getAnalyzer()); iwConf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); iwConf.setRAMBufferSizeMB(RAM_BUF_SIZE); if (sIsBaseline) { LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMergeFactor(MERGE_FACTOR_HI); iwConf.setMergePolicy(ldmp); } IndexWriter indexWriter = new IndexWriter(fsDir, iwConf); for (File file : files) { System.out.println("processing file: " + file); MedlineIndexer indexer = new MedlineIndexer(indexWriter, mCodec); parser.setHandler(indexer); parseFile(parser, file); indexer.close(); recordFile(indexWriter, file.getName()); System.out.println("completed processing file: " + file); } System.out.println("All files parsed, now optimize index"); indexWriter.forceMerge(1); indexWriter.commit(); indexWriter.close(); } System.out.println("Processing complete."); } catch (Exception e) { // mLogger.warn("Unexpected Exception: "+e.getMessage()); // mLogger.warn("stack trace: "+Logging.logStackTrace(e)); // mLogger.warn("Aborting this run"); IllegalStateException e2 = new IllegalStateException(e.getMessage()); e2.setStackTrace(e.getStackTrace()); throw e2; } }
From source file:com.aliasi.lingmed.medline.OptimizeMedline.java
License:Lingpipe license
/** * Run the command. See class documentation above for details on * arguments and behavior.//from w w w . ja v a2s . c o m */ public void run() { // mLogger.info("start run"); try { Directory fsDir = FSDirectory.open(mIndex); IndexWriterConfig iwConf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)); iwConf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter indexWriter = new IndexWriter(fsDir, iwConf); indexWriter.forceMerge(1); indexWriter.commit(); // mLogger.info("Processing complete."); } catch (Exception e) { // mLogger.warn("Unexpected Exception: "+e.getMessage()); // mLogger.warn("stack trace: "+Logging.logStackTrace(e)); // mLogger.warn("Aborting this run"); IllegalStateException e2 = new IllegalStateException(e.getMessage()); e2.setStackTrace(e.getStackTrace()); throw e2; } }
From source file:com.aliasi.lingmed.medline.SearchableMedlineCodec.java
License:Lingpipe license
public static void main(String[] args) throws Exception { org.apache.lucene.store.RAMDirectory directory = new org.apache.lucene.store.RAMDirectory(); // org.apache.lucene.analysis.SimpleAnalyzer analyzer // = new org.apache.lucene.analysis.SimpleAnalyzer(); // org.apache.lucene.analysis.KeywordAnalyzer analyzer // = new org.apache.lucene.analysis.KeywordAnalyzer(); MedlineCodec codec = new MedlineCodec(); Analyzer analyzer = codec.getAnalyzer(); org.apache.lucene.index.IndexWriterConfig iwConf = new org.apache.lucene.index.IndexWriterConfig( org.apache.lucene.util.Version.LUCENE_36, analyzer); iwConf.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE_OR_APPEND); org.apache.lucene.index.IndexWriter indexWriter = new org.apache.lucene.index.IndexWriter(directory, iwConf);/*from w ww . j a va 2s. c om*/ Document doc = new Document(); doc.add(new Field(Fields.MESH_MINOR_FIELD, "abc", Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field(Fields.MESH_MINOR_FIELD, " xyz efg", Field.Store.NO, Field.Index.ANALYZED)); indexWriter.addDocument(doc); indexWriter.close(); org.apache.lucene.index.IndexReader reader = org.apache.lucene.index.IndexReader.open(directory); org.apache.lucene.search.IndexSearcher searcher = new org.apache.lucene.search.IndexSearcher(reader); org.apache.lucene.queryParser.QueryParser qp = new org.apache.lucene.queryParser.QueryParser( org.apache.lucene.util.Version.LUCENE_36, "foo", analyzer); org.apache.lucene.search.Query query = qp.parse(Fields.MESH_MINOR_FIELD + ":efg"); org.apache.lucene.search.TopDocs hits = searcher.search(query, 1000); System.out.println("hits.length()=" + hits.scoreDocs.length); org.apache.lucene.analysis.TokenStream ts = analyzer.tokenStream(Fields.MESH_MINOR_FIELD, new java.io.StringReader("abc xyz efg")); org.apache.lucene.analysis.tokenattributes.CharTermAttribute terms = ts .addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class); org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsets = ts .addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class); org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute positions = ts .addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class); while (ts.incrementToken()) { int increment = positions.getPositionIncrement(); int start = offsets.startOffset(); int end = offsets.endOffset(); String term = terms.toString(); System.out.println("token=|" + term + "|" + " startOffset=" + start + " endOffset=" + end + " positionIncr=" + increment); } }
From source file:com.aurel.track.lucene.index.LuceneIndexer.java
License:Open Source License
/** * Initializes an IndexWriter.//from w ww . j av a 2s . co m * It will be called from the following places: * - on system startup workItemWriter should be initialized! * created = false if no reindex at startup * created = true if reindex at startup * - before explicit reIndexing: created = true * - after reindexing: create = false; * During the adding/editing/deleting of index data the IndexWriter should be initialized with created = false! * @param created * @param index */ public static IndexWriter initWriter(boolean created, int index) { Directory indexDirectory = LuceneUtil.getIndexDirectory(index); if (indexDirectory == null) { LOGGER.error("Can not find or create the index directory for workitems"); return null; } Analyzer analyzer = LuceneUtil.getAnalyzer(); if (analyzer == null) { LOGGER.error("Analyzer is null"); return null; } IndexWriter indexWriter = getIndexWriter(index); if (indexWriter != null) { try { //release the lock indexWriter.close(); } catch (IOException e) { LOGGER.error("Closing the IndexWriter for index " + index + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } } try { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); OpenMode openMode = null; if (created) { openMode = OpenMode.CREATE; } else { openMode = OpenMode.APPEND; } indexWriterConfig.setOpenMode(openMode); indexWriter = new IndexWriter(indexDirectory, indexWriterConfig); } catch (OverlappingFileLockException e) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("OverlappingFileLockException " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } if (!created) { //try again this time with created = true try { //open for create IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setOpenMode(OpenMode.CREATE); //indexWriter = new IndexWriter(indexDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter = new IndexWriter(indexDirectory, indexWriterConfig); //close it in order to reopen it for modifications indexWriter.close(); } catch (IOException e1) { LOGGER.error("Creating the IndexWriter for index " + index + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } //try again this time with created = false try { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setOpenMode(OpenMode.APPEND); //indexWriter = new IndexWriter(indexDirectory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter = new IndexWriter(indexDirectory, indexWriterConfig); } catch (IOException e1) { LOGGER.error("Creating the IndexWriter for index " + index + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } } else { LOGGER.error("Creating the IndexWriter for index " + index + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } } catch (IOException e) { //tried probably with created = false, when the index structure doesn't exist yet //it is the case when by startup the useLucene is active but reindexOnStartup not //we should try to open the writers with false (for modifications) in order to not to destroy the possible existing index //but when the index doesn't exists yet the opening of the writer with false fails. And this is the case now. if (!created) { //try again this time with created = true try { //open for create IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setOpenMode(OpenMode.CREATE); //indexWriter = new IndexWriter(indexDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter = new IndexWriter(indexDirectory, indexWriterConfig); //close it in order to reopen it for modifications indexWriter.close(); } catch (IOException e1) { LOGGER.error("Creating the IndexWriter for index " + index + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } //try again this time with created = false try { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setOpenMode(OpenMode.APPEND); //indexWriter = new IndexWriter(indexDirectory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter = new IndexWriter(indexDirectory, indexWriterConfig); } catch (IOException e1) { LOGGER.error("Creating the IndexWriter for index " + index + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } } else { LOGGER.error("Creating the IndexWriter for index " + index + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } } switch (index) { case LuceneUtil.INDEXES.WORKITEM_INDEX: workItemWriter = indexWriter; break; case LuceneUtil.INDEXES.NOT_LOCALIZED_LIST_INDEX: notLocalizedLookupWriter = indexWriter; break; case LuceneUtil.INDEXES.LOCALIZED_LIST_INDEX: localizedLookupWriter = indexWriter; break; case LuceneUtil.INDEXES.EXTERNAL_LOOKUP_WRITER: externalLookupWriter = indexWriter; break; case LuceneUtil.INDEXES.ATTACHMENT_INDEX: attachmentWriter = indexWriter; break; case LuceneUtil.INDEXES.EXPENSE_INDEX: expenseWriter = indexWriter; break; case LuceneUtil.INDEXES.BUDGET_PLAN_INDEX: budgetPlanWriter = indexWriter; break; case LuceneUtil.INDEXES.LINK_INDEX: linkWriter = indexWriter; break; default: return null; } return indexWriter; }
From source file:com.b2international.index.compat.SingleDirectoryIndexImpl.java
License:Apache License
protected void initLucene(final File indexDirectory, final boolean clean) { try {/*from www .j av a 2 s . c o m*/ this.directory = Directories.openFile(indexDirectory.toPath()); final Analyzer analyzer = new ComponentTermAnalyzer(); final IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setOpenMode(clean ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND); config.setIndexDeletionPolicy(new SnapshotDeletionPolicy(config.getIndexDeletionPolicy())); this.writer = new IndexWriter(directory, config); this.writer.commit(); // Create index if it didn't exist this.manager = new SearcherManager(directory, new SearchWarmerFactory()); } catch (final IOException e) { throw new RuntimeException(e.getMessage(), e); } }
From source file:com.baidu.rigel.biplatform.tesseract.isservice.index.service.IndexWriterFactory.java
License:Open Source License
/** * /*from ww w . java 2s.co m*/ * getIndexWriter * * @param idxPath * * @return IndexWriter * @throws IOException * IO */ public static synchronized IndexWriter getIndexWriter(String idxPath) throws IOException { LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_BEGIN, "getIndexWriter", "[idxPath:" + idxPath + "]")); IndexWriter indexWriter = null; if (INSTANCE.idxWriterMaps.containsKey(idxPath)) { indexWriter = INSTANCE.idxWriterMaps.get(idxPath); LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_PROCESS_NO_PARAM, "getIndexWriter", "return exist IndexWriter ")); } else { File indexFile = new File(idxPath); Directory directory = FSDirectory.open(indexFile); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_10_1, new StandardAnalyzer()); indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); indexWriterConfig.setRAMBufferSizeMB(64.0); indexWriterConfig.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); indexWriter = new IndexWriter(directory, indexWriterConfig); INSTANCE.idxWriterMaps.put(idxPath, indexWriter); LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_PROCESS_NO_PARAM, "getIndexWriter", "create new IndexWriter ")); } LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_END, "getIndexWriter", "[idxPath:" + idxPath + "]")); return indexWriter; }
From source file:com.baidu.rigel.biplatform.tesseract.isservice.index.service.IndexWriterFactory.java
License:Open Source License
public static synchronized IndexWriter getIndexWriterWithSingleSlot(String idxPath) throws IOException { LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_BEGIN, "getIndexWriter", "[idxPath:" + idxPath + "]")); IndexWriter indexWriter = null;//from ww w. ja v a 2 s .c om Integer maxSlot = 0; if (INSTANCE.idxMaps.containsKey(idxPath)) { maxSlot = INSTANCE.idxMaps.get(idxPath); maxSlot++; } File indexFile = new File(idxPath + File.separator + maxSlot); Directory directory = FSDirectory.open(indexFile); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_10_1, new StandardAnalyzer()); indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); indexWriterConfig.setRAMBufferSizeMB(48.0); indexWriter = new IndexWriter(directory, indexWriterConfig); INSTANCE.idxMaps.put(idxPath, maxSlot); LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_END, "getIndexWriter", "[idxPath:" + idxPath + "]")); return indexWriter; }
From source file:com.bala.learning.learning.luence.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;/* ww w . j av a 2 s . co m*/ boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }