Example usage for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB

List of usage examples for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB.

Prototype

@Override
    public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) 

Source Link

Usage

From source file:org.kew.rmf.core.lucene.LuceneDataLoader.java

License:Open Source License

/**
 * Opens an IndexWriter, reusing or wiping an existing index according to the configuration.
 *//*from  w  w  w  .  j  a v  a2s.  c o  m*/
private IndexWriter openIndex() throws IOException {
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(getLuceneVersion(), luceneAnalyzer);
    indexWriterConfig.setRAMBufferSizeMB(RAM_BUFFER_SIZE);

    //      if (getConfig().isReuseIndex()) {
    //         // Reuse the index if it exists, otherwise create a new one.
    //         logger.debug("{}: Reusing existing index, if it exists", configName);
    //         indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    //      }
    //      else {
    // Create a new index, overwriting any that already exists.
    logger.debug("{}: Overwriting existing index, if it exists", configName);
    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    //      }

    IndexWriter indexWriter;

    try {
        indexWriter = new IndexWriter(directory, indexWriterConfig);
    } catch (IOException e) {
        logger.warn("Exception while creating index, removing index directory and retrying", e);
        // Try deleting the index directory.
        File dir = directory.getDirectory();
        if (dir.isDirectory() && dir.listFiles() != null) {
            logger.warn("{}: Wiping existing index directory {}", configName, dir);
            FileUtils.deleteDirectory(dir);
        }
        indexWriter = new IndexWriter(directory, indexWriterConfig);
    }

    return indexWriter;
}

From source file:org.lahab.clucene.server.indexer.Indexer.java

License:Apache License

/**
 * Opens an index writer on the current directory
 * @throws CorruptIndexException//from   w  w w. j  a  va2  s .c  o m
 * @throws IOException
 * @throws ParametizerException 
 */
public void open() throws CorruptIndexException, IOException, ParametizerException {
    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
    IndexWriterConfig configWriter = new IndexWriterConfig(Version.LUCENE_36, analyzer);
    configWriter.setRAMBufferSizeMB(_params.getDouble("bufferSize"));
    configWriter.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    configWriter.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    try {
        _index = new IndexWriter(_directory, configWriter);
        _nbLastCommit = _index.maxDoc();
        _close = false;
    } catch (LockObtainFailedException e) {
        System.out.println("Lock is taken trying again");
        _directory.clearLock("write.lock");
    }
}

From source file:org.neo4j.index.impl.lucene.legacy.LuceneBatchInserterIndex.java

License:Open Source License

private IndexWriter instantiateWriter(File folder) {
    Directory dir = null;/*w w w.  j a v  a  2s.  c o  m*/
    try {
        dir = LuceneDataSource.getDirectory(folder, identifier);
        IndexWriterConfig writerConfig = new IndexWriterConfig(type.analyzer);
        writerConfig.setRAMBufferSizeMB(determineGoodBufferSize(writerConfig.getRAMBufferSizeMB()));
        return new IndexWriter(dir, writerConfig);
    } catch (IOException e) {
        IOUtils.closeAllSilently(dir);
        throw new RuntimeException(e);
    }
}

From source file:org.neo4j.index.impl.lucene.LuceneBatchInserterIndex.java

License:Open Source License

private IndexWriter instantiateWriter(File directory) {
    try {//from ww  w.  ja  v a 2  s.co m
        IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION, type.analyzer);
        writerConfig.setRAMBufferSizeMB(determineGoodBufferSize(writerConfig.getRAMBufferSizeMB()));
        IndexWriter writer = new IndexWriter(getDirectory(directory, identifier), writerConfig);
        return writer;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.neo4j.kernel.api.impl.index.IndexWriterConfigs.java

License:Open Source License

public static IndexWriterConfig population() {
    IndexWriterConfig writerConfig = standard();
    writerConfig.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    writerConfig.setRAMBufferSizeMB(POPULATION_RAM_BUFFER_SIZE_MB);
    return writerConfig;
}

From source file:org.ohdsi.usagi.tests.TestLucene.java

License:Apache License

public static void main(String[] args) throws IOException, ParseException {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    //Analyzer analyzer = new UsagiAnalyzer();
    FieldType textVectorField = new FieldType();
    textVectorField.setIndexed(true);//from www.  j  av a  2s. c  om
    textVectorField.setTokenized(true);
    textVectorField.setStoreTermVectors(true);
    textVectorField.setStoreTermVectorPositions(false);
    textVectorField.setStoreTermVectorPayloads(false);
    textVectorField.setStoreTermVectorOffsets(false);
    textVectorField.setStored(true);
    textVectorField.freeze();

    File indexFolder = new File(folder);
    if (indexFolder.exists())
        DirectoryUtilities.deleteDir(indexFolder);

    Directory dir = FSDirectory.open(indexFolder);

    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new Field("F", "word1 word2 w3 word4", textVectorField));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("F", "word1 word2 w3", textVectorField));
    writer.addDocument(doc);

    writer.close();

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(folder)));
    for (int i = 0; i < reader.numDocs(); i++) {
        TermsEnum termsEnum = reader.getTermVector(i, "F").iterator(null);
        BytesRef text;
        while ((text = termsEnum.next()) != null) {
            System.out.print(text.utf8ToString() + ",");
        }
        System.out.println();
    }
    IndexSearcher searcher = new IndexSearcher(reader);

    // MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader());
    // mlt.setMinTermFreq(0);
    // mlt.setMinDocFreq(0);
    // mlt.setMaxDocFreq(9999);
    // mlt.setMinWordLen(0);
    // mlt.setMaxWordLen(9999);
    // mlt.setMaxDocFreqPct(100);
    // mlt.setMaxNumTokensParsed(9999);
    // mlt.setMaxQueryTerms(9999);
    // mlt.setStopWords(null);
    // mlt.setFieldNames(new String[] { "F" });
    // mlt.setAnalyzer(new UsagiAnalyzer());
    // Query query = mlt.like("F", new StringReader("Systolic blood pressure"));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "F", analyzer);
    Query query = parser.parse("word1");

    Explanation explanation = searcher.explain(query, 0);
    print(explanation);
    System.out.println();
    explanation = searcher.explain(query, 1);
    print(explanation);
    System.out.println();

    TopDocs topDocs = searcher.search(query, 99);
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        System.out.println(scoreDoc.score + "\t" + reader.document(scoreDoc.doc).get("F"));
    }
}

From source file:org.ohdsi.usagi.UsagiSearchEngine.java

License:Apache License

public void createNewMainIndex() {
    try {//  w  ww .ja v a 2 s. c o  m
        File indexFolder = new File(folder + "/" + MAIN_INDEX_FOLDER);
        if (indexFolder.exists())
            DirectoryUtilities.deleteDir(indexFolder);

        Directory dir = FSDirectory.open(indexFolder);

        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, new UsagiAnalyzer());
        iwc.setOpenMode(OpenMode.CREATE);
        iwc.setRAMBufferSizeMB(256.0);
        writer = new IndexWriter(dir, iwc);

    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.olat.search.service.indexer.JmsIndexer.java

License:Apache License

public IndexWriterConfig newIndexWriterConfig() {
    Analyzer analyzer = new StandardAnalyzer(SearchService.OO_LUCENE_VERSION);
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
    indexWriterConfig.setMergePolicy(newLogMergePolicy());
    indexWriterConfig.setRAMBufferSizeMB(ramBufferSizeMB);// for better performance set to 48MB (see lucene docu 'how to make indexing faster")
    indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    return indexWriterConfig;
}

From source file:org.opencms.search.CmsSearchIndex.java

License:Open Source License

/**
 * Creates a new index writer.<p>// w w  w.j a  va  2 s.c  o  m
 * 
 * @param create if <code>true</code> a whole new index is created, if <code>false</code> an existing index is updated
 * 
 * @return the created new index writer
 * 
 * @throws CmsIndexException in case the writer could not be created
 * 
 * @see #getIndexWriter(I_CmsReport, boolean)
 */
protected I_CmsIndexWriter indexWriterCreate(boolean create) throws CmsIndexException {

    IndexWriter indexWriter;
    try {
        // check if the target directory already exists
        File f = new File(m_path);
        if (!f.exists()) {
            // index does not exist yet
            f = f.getParentFile();
            if ((f != null) && !f.exists()) {
                // create the parent folders if required
                f.mkdirs();
            }
            // create must be true if the directory does not exist
            create = true;
        }

        // open file directory for Lucene
        FSDirectory dir = FSDirectory.open(new File(m_path));
        // create Lucene merge policy
        LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
        if (m_luceneMaxMergeDocs != null) {
            mergePolicy.setMaxMergeDocs(m_luceneMaxMergeDocs.intValue());
        }
        if (m_luceneMergeFactor != null) {
            mergePolicy.setMergeFactor(m_luceneMergeFactor.intValue());
        }
        if (m_luceneUseCompoundFile != null) {
            mergePolicy.setUseCompoundFile(m_luceneUseCompoundFile.booleanValue());
        }
        // create a new Lucene index configuration
        IndexWriterConfig indexConfig = new IndexWriterConfig(LUCENE_VERSION, getAnalyzer());
        // set the index configuration parameters if required 
        if (m_luceneRAMBufferSizeMB != null) {
            indexConfig.setRAMBufferSizeMB(m_luceneRAMBufferSizeMB.doubleValue());
        }
        if (create) {
            indexConfig.setOpenMode(OpenMode.CREATE);
        } else {
            indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }
        // create the index
        indexWriter = new IndexWriter(dir, indexConfig);
    } catch (Exception e) {
        throw new CmsIndexException(
                Messages.get().container(Messages.ERR_IO_INDEX_WRITER_OPEN_2, m_path, m_name), e);
    }
    return new CmsLuceneIndexWriter(indexWriter, this);
}

From source file:org.opengrok.indexer.index.IndexDatabase.java

License:Open Source License

/**
 * Update the content of this index database
 *
 * @throws IOException if an error occurs
 *///from ww  w.j a  va2  s. c o  m
public void update() throws IOException {
    synchronized (lock) {
        if (running) {
            throw new IOException("Indexer already running!");
        }
        running = true;
        interrupted = false;
    }

    RuntimeEnvironment env = RuntimeEnvironment.getInstance();

    reader = null;
    writer = null;
    settings = null;
    uidIter = null;
    postsIter = null;
    acceptedNonlocalSymlinks.clear();

    IOException finishingException = null;
    try {
        Analyzer analyzer = AnalyzerGuru.getAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(env.getRamBufferSize());
        /**
         * Most data in OpenGrok is indexed but not stored, so use the best
         * compression on the minority of data that is stored, since it
         * should not have a detrimental impact on overall throughput.
         */
        iwc.setCodec(new Lucene70Codec(Lucene50StoredFieldsFormat.Mode.BEST_COMPRESSION));
        writer = new IndexWriter(indexDirectory, iwc);
        writer.commit(); // to make sure index exists on the disk
        completer = new PendingFileCompleter();

        if (directories.isEmpty()) {
            if (project == null) {
                directories.add("");
            } else {
                directories.add(project.getPath());
            }
        }

        for (String dir : directories) {
            File sourceRoot;
            if ("".equals(dir)) {
                sourceRoot = env.getSourceRootFile();
            } else {
                sourceRoot = new File(env.getSourceRootFile(), dir);
            }

            if (env.isHistoryEnabled()) {
                try {
                    HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);
                } catch (HistoryException ex) {
                    String exmsg = String.format("Failed to ensureHistoryCacheExists() for %s", sourceRoot);
                    LOGGER.log(Level.SEVERE, exmsg, ex);
                    continue;
                }
            }

            dir = Util.fixPathIfWindows(dir);

            String startuid = Util.path2uid(dir, "");
            reader = DirectoryReader.open(indexDirectory); // open existing index
            settings = readAnalysisSettings();
            if (settings == null) {
                settings = new IndexAnalysisSettings();
            }
            Terms terms = null;
            int numDocs = reader.numDocs();
            if (numDocs > 0) {
                Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0);
                terms = uFields.terms(QueryBuilder.U);
            }

            try {
                if (terms != null) {
                    uidIter = terms.iterator();
                    TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid
                    if (stat == TermsEnum.SeekStatus.END) {
                        uidIter = null;
                        LOGGER.log(Level.WARNING, "Couldn''t find a start term for {0}, empty u field?",
                                startuid);
                    }
                }

                // The actual indexing happens in indexParallel().

                IndexDownArgs args = new IndexDownArgs();
                Statistics elapsed = new Statistics();
                LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir);
                indexDown(sourceRoot, dir, args);
                showFileCount(dir, args, elapsed);

                args.cur_count = 0;
                elapsed = new Statistics();
                LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir);
                indexParallel(dir, args);
                elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir));

                // Remove data for the trailing terms that indexDown()
                // did not traverse. These correspond to files that have been
                // removed and have higher ordering than any present files.
                while (uidIter != null && uidIter.term() != null
                        && uidIter.term().utf8ToString().startsWith(startuid)) {

                    removeFile(true);
                    BytesRef next = uidIter.next();
                    if (next == null) {
                        uidIter = null;
                    }
                }

                markProjectIndexed(project);
            } finally {
                reader.close();
            }
        }

        try {
            finishWriting();
        } catch (IOException e) {
            finishingException = e;
        }
    } catch (RuntimeException ex) {
        LOGGER.log(Level.SEVERE, "Failed with unexpected RuntimeException", ex);
        throw ex;
    } finally {
        completer = null;
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            if (finishingException == null) {
                finishingException = e;
            }
            LOGGER.log(Level.WARNING, "An error occurred while closing writer", e);
        } finally {
            writer = null;
            synchronized (lock) {
                running = false;
            }
        }
    }

    if (finishingException != null) {
        throw finishingException;
    }

    if (!isInterrupted() && isDirty()) {
        if (env.isOptimizeDatabase()) {
            optimize();
        }
        env.setIndexTimestamp();
    }
}