List of usage examples for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB
@Override public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB)
From source file:org.kew.rmf.core.lucene.LuceneDataLoader.java
License:Open Source License
/** * Opens an IndexWriter, reusing or wiping an existing index according to the configuration. *//*from w w w . j a v a2s. c o m*/ private IndexWriter openIndex() throws IOException { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(getLuceneVersion(), luceneAnalyzer); indexWriterConfig.setRAMBufferSizeMB(RAM_BUFFER_SIZE); // if (getConfig().isReuseIndex()) { // // Reuse the index if it exists, otherwise create a new one. // logger.debug("{}: Reusing existing index, if it exists", configName); // indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // } // else { // Create a new index, overwriting any that already exists. logger.debug("{}: Overwriting existing index, if it exists", configName); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // } IndexWriter indexWriter; try { indexWriter = new IndexWriter(directory, indexWriterConfig); } catch (IOException e) { logger.warn("Exception while creating index, removing index directory and retrying", e); // Try deleting the index directory. File dir = directory.getDirectory(); if (dir.isDirectory() && dir.listFiles() != null) { logger.warn("{}: Wiping existing index directory {}", configName, dir); FileUtils.deleteDirectory(dir); } indexWriter = new IndexWriter(directory, indexWriterConfig); } return indexWriter; }
From source file:org.lahab.clucene.server.indexer.Indexer.java
License:Apache License
/** * Opens an index writer on the current directory * @throws CorruptIndexException//from w w w. j a va2 s .c o m * @throws IOException * @throws ParametizerException */ public void open() throws CorruptIndexException, IOException, ParametizerException { StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); IndexWriterConfig configWriter = new IndexWriterConfig(Version.LUCENE_36, analyzer); configWriter.setRAMBufferSizeMB(_params.getDouble("bufferSize")); configWriter.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); configWriter.setOpenMode(IndexWriterConfig.OpenMode.CREATE); try { _index = new IndexWriter(_directory, configWriter); _nbLastCommit = _index.maxDoc(); _close = false; } catch (LockObtainFailedException e) { System.out.println("Lock is taken trying again"); _directory.clearLock("write.lock"); } }
From source file:org.neo4j.index.impl.lucene.legacy.LuceneBatchInserterIndex.java
License:Open Source License
private IndexWriter instantiateWriter(File folder) { Directory dir = null;/*w w w. j a v a 2s. c o m*/ try { dir = LuceneDataSource.getDirectory(folder, identifier); IndexWriterConfig writerConfig = new IndexWriterConfig(type.analyzer); writerConfig.setRAMBufferSizeMB(determineGoodBufferSize(writerConfig.getRAMBufferSizeMB())); return new IndexWriter(dir, writerConfig); } catch (IOException e) { IOUtils.closeAllSilently(dir); throw new RuntimeException(e); } }
From source file:org.neo4j.index.impl.lucene.LuceneBatchInserterIndex.java
License:Open Source License
private IndexWriter instantiateWriter(File directory) { try {//from ww w. ja v a 2 s.co m IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION, type.analyzer); writerConfig.setRAMBufferSizeMB(determineGoodBufferSize(writerConfig.getRAMBufferSizeMB())); IndexWriter writer = new IndexWriter(getDirectory(directory, identifier), writerConfig); return writer; } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.neo4j.kernel.api.impl.index.IndexWriterConfigs.java
License:Open Source License
public static IndexWriterConfig population() { IndexWriterConfig writerConfig = standard(); writerConfig.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); writerConfig.setRAMBufferSizeMB(POPULATION_RAM_BUFFER_SIZE_MB); return writerConfig; }
From source file:org.ohdsi.usagi.tests.TestLucene.java
License:Apache License
public static void main(String[] args) throws IOException, ParseException { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); //Analyzer analyzer = new UsagiAnalyzer(); FieldType textVectorField = new FieldType(); textVectorField.setIndexed(true);//from www. j av a 2s. c om textVectorField.setTokenized(true); textVectorField.setStoreTermVectors(true); textVectorField.setStoreTermVectorPositions(false); textVectorField.setStoreTermVectorPayloads(false); textVectorField.setStoreTermVectorOffsets(false); textVectorField.setStored(true); textVectorField.freeze(); File indexFolder = new File(folder); if (indexFolder.exists()) DirectoryUtilities.deleteDir(indexFolder); Directory dir = FSDirectory.open(indexFolder); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new Field("F", "word1 word2 w3 word4", textVectorField)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("F", "word1 word2 w3", textVectorField)); writer.addDocument(doc); writer.close(); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(folder))); for (int i = 0; i < reader.numDocs(); i++) { TermsEnum termsEnum = reader.getTermVector(i, "F").iterator(null); BytesRef text; while ((text = termsEnum.next()) != null) { System.out.print(text.utf8ToString() + ","); } System.out.println(); } IndexSearcher searcher = new IndexSearcher(reader); // MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader()); // mlt.setMinTermFreq(0); // mlt.setMinDocFreq(0); // mlt.setMaxDocFreq(9999); // mlt.setMinWordLen(0); // mlt.setMaxWordLen(9999); // mlt.setMaxDocFreqPct(100); // mlt.setMaxNumTokensParsed(9999); // mlt.setMaxQueryTerms(9999); // mlt.setStopWords(null); // mlt.setFieldNames(new String[] { "F" }); // mlt.setAnalyzer(new UsagiAnalyzer()); // Query query = mlt.like("F", new StringReader("Systolic blood pressure")); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "F", analyzer); Query query = parser.parse("word1"); Explanation explanation = searcher.explain(query, 0); print(explanation); System.out.println(); explanation = searcher.explain(query, 1); print(explanation); System.out.println(); TopDocs topDocs = searcher.search(query, 99); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { System.out.println(scoreDoc.score + "\t" + reader.document(scoreDoc.doc).get("F")); } }
From source file:org.ohdsi.usagi.UsagiSearchEngine.java
License:Apache License
public void createNewMainIndex() { try {// w ww .ja v a 2 s. c o m File indexFolder = new File(folder + "/" + MAIN_INDEX_FOLDER); if (indexFolder.exists()) DirectoryUtilities.deleteDir(indexFolder); Directory dir = FSDirectory.open(indexFolder); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, new UsagiAnalyzer()); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(256.0); writer = new IndexWriter(dir, iwc); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.olat.search.service.indexer.JmsIndexer.java
License:Apache License
public IndexWriterConfig newIndexWriterConfig() { Analyzer analyzer = new StandardAnalyzer(SearchService.OO_LUCENE_VERSION); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer); indexWriterConfig.setMergePolicy(newLogMergePolicy()); indexWriterConfig.setRAMBufferSizeMB(ramBufferSizeMB);// for better performance set to 48MB (see lucene docu 'how to make indexing faster") indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); return indexWriterConfig; }
From source file:org.opencms.search.CmsSearchIndex.java
License:Open Source License
/** * Creates a new index writer.<p>// w w w.j a va 2 s.c o m * * @param create if <code>true</code> a whole new index is created, if <code>false</code> an existing index is updated * * @return the created new index writer * * @throws CmsIndexException in case the writer could not be created * * @see #getIndexWriter(I_CmsReport, boolean) */ protected I_CmsIndexWriter indexWriterCreate(boolean create) throws CmsIndexException { IndexWriter indexWriter; try { // check if the target directory already exists File f = new File(m_path); if (!f.exists()) { // index does not exist yet f = f.getParentFile(); if ((f != null) && !f.exists()) { // create the parent folders if required f.mkdirs(); } // create must be true if the directory does not exist create = true; } // open file directory for Lucene FSDirectory dir = FSDirectory.open(new File(m_path)); // create Lucene merge policy LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); if (m_luceneMaxMergeDocs != null) { mergePolicy.setMaxMergeDocs(m_luceneMaxMergeDocs.intValue()); } if (m_luceneMergeFactor != null) { mergePolicy.setMergeFactor(m_luceneMergeFactor.intValue()); } if (m_luceneUseCompoundFile != null) { mergePolicy.setUseCompoundFile(m_luceneUseCompoundFile.booleanValue()); } // create a new Lucene index configuration IndexWriterConfig indexConfig = new IndexWriterConfig(LUCENE_VERSION, getAnalyzer()); // set the index configuration parameters if required if (m_luceneRAMBufferSizeMB != null) { indexConfig.setRAMBufferSizeMB(m_luceneRAMBufferSizeMB.doubleValue()); } if (create) { indexConfig.setOpenMode(OpenMode.CREATE); } else { indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); } // create the index indexWriter = new IndexWriter(dir, indexConfig); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_IO_INDEX_WRITER_OPEN_2, m_path, m_name), e); } return new CmsLuceneIndexWriter(indexWriter, this); }
From source file:org.opengrok.indexer.index.IndexDatabase.java
License:Open Source License
/** * Update the content of this index database * * @throws IOException if an error occurs *///from ww w.j a va2 s. c o m public void update() throws IOException { synchronized (lock) { if (running) { throw new IOException("Indexer already running!"); } running = true; interrupted = false; } RuntimeEnvironment env = RuntimeEnvironment.getInstance(); reader = null; writer = null; settings = null; uidIter = null; postsIter = null; acceptedNonlocalSymlinks.clear(); IOException finishingException = null; try { Analyzer analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(env.getRamBufferSize()); /** * Most data in OpenGrok is indexed but not stored, so use the best * compression on the minority of data that is stored, since it * should not have a detrimental impact on overall throughput. */ iwc.setCodec(new Lucene70Codec(Lucene50StoredFieldsFormat.Mode.BEST_COMPRESSION)); writer = new IndexWriter(indexDirectory, iwc); writer.commit(); // to make sure index exists on the disk completer = new PendingFileCompleter(); if (directories.isEmpty()) { if (project == null) { directories.add(""); } else { directories.add(project.getPath()); } } for (String dir : directories) { File sourceRoot; if ("".equals(dir)) { sourceRoot = env.getSourceRootFile(); } else { sourceRoot = new File(env.getSourceRootFile(), dir); } if (env.isHistoryEnabled()) { try { HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot); } catch (HistoryException ex) { String exmsg = String.format("Failed to ensureHistoryCacheExists() for %s", sourceRoot); LOGGER.log(Level.SEVERE, exmsg, ex); continue; } } dir = Util.fixPathIfWindows(dir); String startuid = Util.path2uid(dir, ""); reader = DirectoryReader.open(indexDirectory); // open existing index settings = readAnalysisSettings(); if (settings == null) { settings = new IndexAnalysisSettings(); } Terms terms = null; int numDocs = reader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U); } try { if (terms != null) { uidIter = terms.iterator(); TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid if (stat == TermsEnum.SeekStatus.END) { uidIter = null; LOGGER.log(Level.WARNING, "Couldn''t find a start term for {0}, empty u field?", startuid); } } // The actual indexing happens in indexParallel(). IndexDownArgs args = new IndexDownArgs(); Statistics elapsed = new Statistics(); LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir); indexDown(sourceRoot, dir, args); showFileCount(dir, args, elapsed); args.cur_count = 0; elapsed = new Statistics(); LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir); indexParallel(dir, args); elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir)); // Remove data for the trailing terms that indexDown() // did not traverse. These correspond to files that have been // removed and have higher ordering than any present files. while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) { removeFile(true); BytesRef next = uidIter.next(); if (next == null) { uidIter = null; } } markProjectIndexed(project); } finally { reader.close(); } } try { finishWriting(); } catch (IOException e) { finishingException = e; } } catch (RuntimeException ex) { LOGGER.log(Level.SEVERE, "Failed with unexpected RuntimeException", ex); throw ex; } finally { completer = null; try { if (writer != null) { writer.close(); } } catch (IOException e) { if (finishingException == null) { finishingException = e; } LOGGER.log(Level.WARNING, "An error occurred while closing writer", e); } finally { writer = null; synchronized (lock) { running = false; } } } if (finishingException != null) { throw finishingException; } if (!isInterrupted() && isDirty()) { if (env.isOptimizeDatabase()) { optimize(); } env.setIndexTimestamp(); } }