Example usage for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB.

Prototype

@Override
    public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB)

Source Link

Usage

From source file:edu.cmu.geolocator.io.GetWriter.java

License:Apache License

public static IndexWriter getIndexWriter(String indexdirectory, double buffersize) throws IOException {
    Directory dir;//from w  w  w.  j av a2  s .  c o  m
    if (OSUtil.isWindows())
        dir = FSDirectory.open(new File(indexdirectory));
    else
        dir = NIOFSDirectory.open(new File(indexdirectory));

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer);

    config.setOpenMode(OpenMode.CREATE_OR_APPEND);
    config.setRAMBufferSizeMB(buffersize);
    LogDocMergePolicy mergePolicy = new LogDocMergePolicy();
    mergePolicy.setMergeFactor(3);
    config.setMergePolicy(mergePolicy);

    IndexWriter writer = new IndexWriter(dir, config);
    return writer;
}

From source file:edu.ehu.galan.lite.algorithms.ranked.supervised.tfidf.corpus.plain.PlainCorpusBuilder.java

License:Open Source License

private void initializeIndex(String index) {
    final File docDir = new File(index);

    Date start = new Date();
    try {//from   ww w .j av a 2 s. c o  m
        System.out.println("Indexing to directory '" + docDir.getAbsolutePath() + "'...");

        Directory dir = FSDirectory.open(new File(index));
        Analyzer analyzer = new AnalyzerSpanish(Version.LUCENE_40);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
        ////      in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        iwc.setRAMBufferSizeMB(1024.0);
        writer = new IndexWriter(dir, iwc);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        //             writer.forceMerge(1);
        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:edu.ehu.galan.lite.algorithms.ranked.supervised.tfidf.corpus.wikipedia.WikiCorpusBuilder.java

License:Open Source License

private void initializeIndex(String index) {
    final File docDir = new File(index);

    Date start = new Date();
    try {//www  . j  ava  2 s .  c  o  m
        System.out.println("Indexing to directory '" + docDir.getAbsolutePath() + "'...");

        Directory dir = FSDirectory.open(new File(index));
        Analyzer analyzer = new AnalyzerEnglish(Version.LUCENE_40);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
        ////      in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        iwc.setRAMBufferSizeMB(1024.0);
        writer = new IndexWriter(dir, iwc);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        //             writer.forceMerge(1);
        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:edu.rpi.tw.linkipedia.search.indexing.EntityIndexer.java

License:Open Source License

public void createIndex() {
    try {//  w w  w  .j  a va2 s. co  m

        Analyzer stdAnalyzer = DefaultAnalyzer.getAnalyzer();
        PayloadEncoder encoder = new FloatEncoder();
        EntropyAnalyzer entropyAnalyzer = new EntropyAnalyzer(encoder);
        Map<String, Analyzer> myAnalyzerMap = new HashMap<String, Analyzer>();
        myAnalyzerMap.put("related_object", entropyAnalyzer);
        myAnalyzerMap.put("label", entropyAnalyzer);
        myAnalyzerMap.put("defaultLabel", entropyAnalyzer);
        myAnalyzerMap.put("analyzedLabel", stdAnalyzer);
        PerFieldAnalyzerWrapper MyAnalyzer = new PerFieldAnalyzerWrapper(stdAnalyzer, myAnalyzerMap);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, MyAnalyzer);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        iwc.setRAMBufferSizeMB(4096);
        iwc.setMaxThreadStates(36);
        iwc.setSimilarity(new MySimilarity());
        Directory dir = FSDirectory.open(new File(indexDirectory));
        IndexWriter writer = new IndexWriter(dir, iwc);
        System.out.println("Indexing to directory '" + indexDirectory + "'...");
        indexDocs(writer, new File(sourceDirectory));
        System.out.println("Optimizing...");
        writer.close();
        System.out.println("Finished Indexing");

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:edu.rpi.tw.linkipedia.search.indexing.EntityIndexUpdater.java

License:Open Source License

public void updateIndex() {
    try {//from   w w  w .ja v a  2 s  .c o  m

        Analyzer stdAnalyzer = DefaultAnalyzer.getAnalyzer();
        PayloadEncoder encoder = new FloatEncoder();
        EntropyAnalyzer entropyAnalyzer = new EntropyAnalyzer(encoder);
        Map<String, Analyzer> myAnalyzerMap = new HashMap<String, Analyzer>();
        myAnalyzerMap.put("related_object", entropyAnalyzer);
        myAnalyzerMap.put("label", entropyAnalyzer);
        myAnalyzerMap.put("defaultLabel", entropyAnalyzer);
        myAnalyzerMap.put("analyzedLabel", stdAnalyzer);
        PerFieldAnalyzerWrapper MyAnalyzer = new PerFieldAnalyzerWrapper(stdAnalyzer, myAnalyzerMap);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, MyAnalyzer);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(4096);
        iwc.setMaxThreadStates(36);
        iwc.setSimilarity(new MySimilarity());
        Directory dir = FSDirectory.open(new File(indexDirectory));
        IndexWriter writer = new IndexWriter(dir, iwc);
        System.out.println("Update directory '" + indexDirectory + "'...");
        indexDocs(writer, new File(sourceDirectory));
        System.out.println("Optimizing...");
        writer.close();
        System.out.println("Finished Updating");

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:edu.udel.ece.infolab.btc.Indexing.java

License:Apache License

/**
 * Create a index writer that uses a #TupleAnalyzer on the triples fields with
 * a tokenization of the URI's localname, and the default #WhitespaceAnalyzer
 * on the others.//from   w w w . j  av  a 2 s  .  co  m
 * @param dir
 * @return
 * @throws IOException
 */
@SuppressWarnings("deprecation")
private IndexWriter initializeIndexWriter(final Directory dir) throws IOException {
    final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_31);
    final Map<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>();
    final TupleAnalyzer tuple = new TupleAnalyzer(new StandardAnalyzer(Version.LUCENE_31));
    tuple.setURINormalisation(URINormalisation.LOCALNAME);
    fieldAnalyzers.put(OUTGOING_TRIPLE, tuple);
    fieldAnalyzers.put(INCOMING_TRIPLE, tuple);

    final IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31,
            new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers));

    // Disable compound file
    ((LogMergePolicy) config.getMergePolicy()).setUseCompoundFile(false);
    // Increase merge factor to 20 - more adapted to batch creation
    ((LogMergePolicy) config.getMergePolicy()).setMergeFactor(20);

    config.setRAMBufferSizeMB(256);
    config.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    config.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);

    final IndexWriter writer = new IndexWriter(dir, config);
    writer.setMaxFieldLength(Integer.MAX_VALUE);
    return writer;
}

From source file:edu.utsa.sifter.Indexer.java

License:Apache License

static IndexWriter getIndexWriter(final String path, final String stopwords, final SifterConfig conf)
        throws IOException {
    Directory dir = FSDirectory.open(new File(path));

    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_44, getStopList(stopwords));
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setRAMBufferSizeMB(conf.INDEXING_BUFFER_SIZE);
    iwc.setMaxThreadStates(conf.THREAD_POOL_SIZE);
    IndexWriter writer = new IndexWriter(dir, iwc);
    return writer;
}

From source file:edu.utsa.sifter.som.MainSOM.java

License:Apache License

IndexWriter createWriter(final File somIdx, final SifterConfig conf) throws CorruptIndexException, IOException {
    Directory dir = FSDirectory.open(somIdx);

    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setRAMBufferSizeMB(conf.INDEXING_BUFFER_SIZE);
    IndexWriter writer = new IndexWriter(dir, iwc);
    return writer;
}

From source file:edu.virginia.cs.index.AnswerIndexer.java

/**
 * Creates the initial index files on disk
 *
 * @param indexPath//w  w  w  .j  a v  a  2  s .c  om
 * @return
 * @throws IOException
 */
private static IndexWriter setupIndex(String indexPath) throws IOException {
    Analyzer analyzer = new SpecialAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    config.setRAMBufferSizeMB(2048.0);

    FSDirectory dir = FSDirectory.open(new File(indexPath));
    IndexWriter writer = new IndexWriter(dir, config);

    return writer;
}

From source file:edu.virginia.cs.index.PostLinkIndexer.java

/**
 * Creates the initial index files on disk
 *
 * @param indexPath//  www  .j  a va  2s .  com
 * @return
 * @throws IOException
 */
private static IndexWriter setupIndex(String indexPath) throws IOException {
    Analyzer analyzer = new SpecialAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    config.setRAMBufferSizeMB(2048.0);

    FSDirectory dir;
    IndexWriter writer = null;
    dir = FSDirectory.open(new File(indexPath));
    writer = new IndexWriter(dir, config);

    return writer;
}