Example usage for org.apache.lucene.index IndexWriterConfig setMaxBufferedDocs

List of usage examples for org.apache.lucene.index IndexWriterConfig setMaxBufferedDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setMaxBufferedDocs.

Prototype

@Override
    public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) 

Source Link

Usage

From source file:dk.defxws.fgslucene.OperationsImpl.java

License:Open Source License

private void getIndexWriter(String indexName) throws GenericSearchException {
    if (iw == null) {
        Directory dir;/*  w  ww . j av  a2  s .  c  o m*/
        try {
            dir = new SimpleFSDirectory(new File(config.getIndexDir(indexName)));
        } catch (Exception e) {
            throw new GenericSearchException("IndexWriter new error indexName=" + indexName + " :\n", e);
        }
        IndexWriterConfig iwconfig = new IndexWriterConfig(Version.LUCENE_36, getQueryAnalyzer(indexName));
        int maxBufferedDocs = config.getMaxBufferedDocs(indexName);
        if (maxBufferedDocs > 0) {
            iwconfig.setMaxBufferedDocs(maxBufferedDocs);
        }
        int mergeFactor = config.getMergeFactor(indexName);
        if (mergeFactor > 0) {
            LogDocMergePolicy ldmp = new LogDocMergePolicy();
            ldmp.setMergeFactor(mergeFactor);
            iwconfig.setMergePolicy(ldmp);
        }
        long defaultWriteLockTimeout = config.getDefaultWriteLockTimeout(indexName);
        if (defaultWriteLockTimeout > 0) {
            IndexWriterConfig.setDefaultWriteLockTimeout(defaultWriteLockTimeout);
        }
        try {
            iw = new IndexWriter(dir, iwconfig);
        } catch (Exception e) {
            throw new GenericSearchException("IndexWriter new error indexName=" + indexName + " :\n", e);
        }
    }
    try {
        docCount = iw.numDocs();
    } catch (Exception e) {
        closeIndexWriter(indexName);
        throw new GenericSearchException("IndexWriter numDocs error indexName=" + indexName + " :\n", e);
    }
    if (logger.isDebugEnabled())
        logger.debug("getIndexWriter indexName=" + indexName + " docCount=" + docCount);
}

From source file:edu.udel.ece.infolab.btc.Indexing.java

License:Apache License

/**
 * Create a index writer that uses a #TupleAnalyzer on the triples fields with
 * a tokenization of the URI's localname, and the default #WhitespaceAnalyzer
 * on the others.//from   w w w  .  jav  a 2 s . com
 * @param dir
 * @return
 * @throws IOException
 */
@SuppressWarnings("deprecation")
private IndexWriter initializeIndexWriter(final Directory dir) throws IOException {
    final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_31);
    final Map<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>();
    final TupleAnalyzer tuple = new TupleAnalyzer(new StandardAnalyzer(Version.LUCENE_31));
    tuple.setURINormalisation(URINormalisation.LOCALNAME);
    fieldAnalyzers.put(OUTGOING_TRIPLE, tuple);
    fieldAnalyzers.put(INCOMING_TRIPLE, tuple);

    final IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31,
            new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers));

    // Disable compound file
    ((LogMergePolicy) config.getMergePolicy()).setUseCompoundFile(false);
    // Increase merge factor to 20 - more adapted to batch creation
    ((LogMergePolicy) config.getMergePolicy()).setMergeFactor(20);

    config.setRAMBufferSizeMB(256);
    config.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    config.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);

    final IndexWriter writer = new IndexWriter(dir, config);
    writer.setMaxFieldLength(Integer.MAX_VALUE);
    return writer;
}

From source file:indexer.LuceneIndexer.java

/**
 * Indexing the files. This method checks for the directories and then 
 * finishes out after the indexing is complete.
 * @param global This is for reference to the global class variables 
 * and methods./* ww  w  .  ja  v  a 2s . c  om*/
 * @param createIndex If true a new index will be created from scratch
 * and the old index will be destroyed.
 * @param indexPanel If true it will also print the console printout lines 
 * to the main panel.
 */
public static void IndexFiles(Global global, Boolean createIndex) {
    String dataDir = global.dataDir;
    String indexDir = global.indexDir;

    //Verifies that the data directory exists
    if (dataDir == null) {
        System.err.println("Data Directory Is not accessable, Unable to Index files.");
    }

    //Verifies that the data directory is readable and writeable
    final Path docDir = Paths.get(dataDir);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
    }

    startTime = new Date();
    try {
        System.out.println("Indexing to directory '" + indexDir + "'...");

        //Setups the analyzer
        Analyzer analyzer;
        try (Directory dir = FSDirectory.open(Paths.get(indexDir))) {

            analyzer = new StandardAnalyzer();
            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
            if (createIndex) {
                // Create a new index in the directory, removing any
                // previously indexed documents:
                iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            } else {
                // Add new documents to an existing index:
                iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            }
            iwc.setRAMBufferSizeMB(global.RAM_BUFFER_SIZE);
            iwc.setMaxBufferedDocs(global.MAX_BUFFERED_DOCS);

            LogDocMergePolicy ldmp = new LogDocMergePolicy();
            ldmp.setMergeFactor(global.MERGE_FACTOR);
            iwc.setMergePolicy(ldmp);

            try (IndexWriter writer = new IndexWriter(dir, iwc)) {
                hm.clear();
                indexDocs(writer, docDir, global);

                //This is a costly operation, we scheduled the time to apply it
                if (global.merge) {
                    System.out.println("Starting Merge");
                    writer.forceMerge(1);
                    global.merge = false;
                }
                writer.close();
            }
            finishTime = new Date();
            long millis = finishTime.getTime() - startTime.getTime();
            totalTime = String.format("%02dhr %02dmin %02dsec", TimeUnit.MILLISECONDS.toHours(millis),
                    TimeUnit.MILLISECONDS.toMinutes(millis)
                            - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(millis)), // The change is in this line
                    TimeUnit.MILLISECONDS.toSeconds(millis)
                            - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis)));
            System.out.println("");
            System.out.println("");
            System.out.println("Start Time:          " + global.sdf.format(startTime.getTime()));
            System.out.println("Building List Time:  " + listBuildTime);
            System.out.println("Indexing Time:       " + indexingTime);
            System.out.println("Total Time:          " + totalTime);
            System.out.println("Number of Documents: " + amountOfDocuments);
            System.out.println("Finish Time:         " + global.sdf.format(finishTime.getTime()));
            System.out.println("");
        }
        analyzer.close();
    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
        log.fatal(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:io.druid.extension.lucene.LuceneDruidSegment.java

License:Apache License

private static IndexWriter buildRamWriter(RAMDirectory dir, Analyzer analyzer, int maxDocsPerSegment)
        throws IOException {
    IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer);
    writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    // some arbitrary large numbers
    writerConfig.setMaxBufferedDocs(maxDocsPerSegment * 2);
    writerConfig.setRAMBufferSizeMB(5000);
    writerConfig.setUseCompoundFile(false);
    writerConfig.setCommitOnClose(true);
    writerConfig.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    writerConfig.setMergePolicy(NoMergePolicy.INSTANCE);
    writerConfig.setMergeScheduler(NoMergeScheduler.INSTANCE);
    return new IndexWriter(dir, writerConfig);
}

From source file:io.jpress.searcher.LuceneSearcher.java

License:LGPL

public IndexWriter createIndexWriter() throws IOException {
    if (mIndexFilePath == null) {
        throw new NullPointerException("please invoke init() method first!");
    }//from   ww w.ja  va 2s.com

    Analyzer analyzer = new JcsegAnalyzer5X(JcsegTaskConfig.COMPLEX_MODE);

    // ?(?): ???
    JcsegAnalyzer5X jcseg = (JcsegAnalyzer5X) analyzer;
    // ???, ?jcseg.properties?jcseg.loadsyn=1
    JcsegTaskConfig config = jcseg.getTaskConfig();
    // ?, ?jcseg.properties?jcseg.loadpinyin=1
    config.setAppendCJKSyn(true);
    // ?, com.webssky.jcseg.core.JcsegTaskConfig
    config.setAppendCJKPinyin(true);

    Directory fsDirectory = FSDirectory.open(Paths.get(mIndexFilePath));
    IndexWriterConfig indexConfig = new IndexWriterConfig(analyzer);
    indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    indexConfig.setMaxBufferedDocs(1000);
    IndexWriter indexWriter = new IndexWriter(fsDirectory, indexConfig);
    return indexWriter;
}

From source file:net.sf.logsaw.index.internal.ARunWithIndexWriter.java

License:Open Source License

/**
 * Opens a Lucene index writer, executes the callback method and then closes the writer.
 * @param log the log resource, may be <code>null</code>
 * @param analyzer the Lucene analyzer to set on the index writer
 * @param matchVersion the Lucene match version
 * @return any object or <code>null</code>
 * @throws CoreException if an <strong>expected</strong> error occurred
 *///from   w ww  .  java  2s.  c  o m
protected final T runWithIndexWriter(ILogResource log, Analyzer analyzer, Version matchVersion)
        throws CoreException {
    logger.info("Opening index writer for '" + log.getName() + "'..."); //$NON-NLS-1$ //$NON-NLS-2$
    IndexWriter writer = null;
    try {
        Directory dir = FSDirectory.open(IndexPlugin.getDefault().getIndexFile(log));
        LogMergePolicy mp = new LogByteSizeMergePolicy();
        mp.setMergeFactor(30);
        IndexWriterConfig cfg = new IndexWriterConfig(matchVersion, analyzer);
        cfg.setMaxBufferedDocs(1000);
        cfg.setMergePolicy(mp);
        writer = new IndexWriter(dir, cfg);
        try {
            return doRunWithIndexWriter(writer, log);
        } finally {
            logger.info("Closing index writer for '" + log.getName() + "'..."); //$NON-NLS-1$ //$NON-NLS-2$
            writer.close();
        }
    } catch (CoreException e) {
        // Rethrow original CoreException
        throw e;
    } catch (Exception e) {
        // Unexpected exception; wrap with CoreException
        throw new CoreException(new Status(IStatus.ERROR, IndexPlugin.PLUGIN_ID,
                NLS.bind(Messages.LuceneIndexService_error_failedToUpdateIndex,
                        new Object[] { log.getName(), e.getLocalizedMessage() }),
                e));
    }
}

From source file:org.ala.lucene.Autocompleter.java

License:Open Source License

@SuppressWarnings("unchecked")
public void reIndex(Directory sourceDirectory, String fieldToAutocomplete, boolean createNewIndex)
        throws CorruptIndexException, IOException {
    // build a dictionary (from the spell package)
    IndexReader sourceReader = IndexReader.open(sourceDirectory);

    LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete);

    // code from/*ww  w  .  j a  v a  2  s. c  om*/
    // org.apache.lucene.search.spell.SpellChecker.indexDictionary(
    // Dictionary)
    IndexWriter.unlock(autoCompleteDirectory);

    // use a custom analyzer so we can do EdgeNGramFiltering
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SolrUtils.BIE_LUCENE_VERSION, new Analyzer() {
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            final StandardTokenizer src = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader);
            TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader);
            result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result);
            result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result);
            result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result,
                    new CharArraySet(SolrUtils.BIE_LUCENE_VERSION,
                            new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS)), true));
            result = new EdgeNGramTokenFilter(result, Side.FRONT, 1, 20);
            return new TokenStreamComponents(src, result) {
                @Override
                protected void setReader(final Reader reader) throws IOException {
                    super.setReader(reader);
                }

            };
        }
        //            public TokenStream tokenStream(String fieldName, Reader reader) {
        //            TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader);
        //            
        //            result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result);
        //            result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result);
        //            //result = new ISOLatin1AccentFilter(result);
        //            result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result, new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS)));
        //            result = new EdgeNGramTokenFilter(result, Side.FRONT,1, 20);
        //            
        //            return result;
        //          }
    });
    if (createNewIndex) {
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    indexWriterConfig.setMaxBufferedDocs(150);
    IndexWriter writer = new IndexWriter(autoCompleteDirectory, indexWriterConfig);
    //        writer.setMergeFactor(300);

    // go through every word, storing the original word (incl. n-grams)
    // and the number of times it occurs
    Map<String, Integer> wordsMap = new HashMap<String, Integer>();

    Iterator<String> iter = (Iterator<String>) dict.getWordsIterator();
    while (iter.hasNext()) {
        String word = iter.next();

        int len = word.length();
        if (len < 3) {
            continue; // too short we bail but "too long" is fine...
        }

        if (wordsMap.containsKey(word)) {
            throw new IllegalStateException("This should never happen in Lucene 2.3.2");
            // wordsMap.put(word, wordsMap.get(word) + 1);
        } else {
            // use the number of documents this word appears in
            wordsMap.put(word, sourceReader.docFreq(new Term(fieldToAutocomplete, word)));
        }
    }

    for (String word : wordsMap.keySet()) {
        // ok index the word
        Document doc = new Document();
        doc.add(new Field(SOURCE_WORD_FIELD, word, Field.Store.YES, Field.Index.NOT_ANALYZED)); // orig term
        doc.add(new Field(GRAMMED_WORDS_FIELD, word, Field.Store.YES, Field.Index.ANALYZED)); // grammed
        doc.add(new Field(COUNT_FIELD, Integer.toString(wordsMap.get(word)), Field.Store.NO,
                Field.Index.NOT_ANALYZED)); // count

        writer.addDocument(doc);
    }

    sourceReader.close();

    // close writer
    writer.forceMerge(1);
    writer.close();

    // re-open our reader
    reOpenReader();
}

From source file:org.apache.nutch.indexwriter.lucene.LuceneWriter.java

License:Apache License

public void open(JobConf job, String name) throws IOException {
    this.fs = FileSystem.get(job);
    perm = new Path(FileOutputFormat.getOutputPath(job), name);
    temp = job.getLocalPath("index/_" + Integer.toString(new Random().nextInt()));
    fs.delete(perm, true); // delete old, if any
    analyzerFactory = new AnalyzerFactory(job);
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_10_2,
            new SmartChineseAnalyzer());
    LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy();
    mergePolicy.setMergeFactor(job.getInt("indexer.mergeFactor", 10));
    mergePolicy.setMaxMergeDocs(job.getInt("indexer.maxMergeDocs", Integer.MAX_VALUE));

    indexWriterConfig.setMergePolicy(mergePolicy);
    indexWriterConfig.setUseCompoundFile(false);
    indexWriterConfig.setTermIndexInterval(job.getInt("indexer.termIndexInterval", 128));
    indexWriterConfig.setMaxBufferedDocs(job.getInt("indexer.minMergeDocs", 100));
    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    writer = new org.apache.lucene.index.IndexWriter(
            FSDirectory.open(new File(fs.startLocalOutput(perm, temp).toString())), indexWriterConfig);

    /*//  ww  w  . j ava  2s  .  c o  m
     * addFieldOptions("title", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job);
     * addFieldOptions("url", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job);
     * addFieldOptions("content", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job);
     * addFieldOptions("lang", STORE.YES, INDEX.UNTOKENIZED, VECTOR.NO, job);
     */

    processOptions(job);
}

From source file:org.apache.solr.search.function.TestOrdValues.java

License:Apache License

protected static void createIndex(boolean doMultiSegment) throws Exception {
    if (VERBOSE) {
        System.out.println("TEST: setUp");
    }// w  ww. ja v  a2  s. co m
    // prepare a small index with just a few documents.
    dir = newDirectory();
    anlzr = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(anlzr).setMergePolicy(newLogMergePolicy());
    if (doMultiSegment) {
        iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 7));
    }
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    // add docs not exactly in natural ID order, to verify we do check the order of docs by scores
    int remaining = N_DOCS;
    boolean done[] = new boolean[N_DOCS];
    int i = 0;
    while (remaining > 0) {
        if (done[i]) {
            throw new Exception(
                    "to set this test correctly N_DOCS=" + N_DOCS + " must be primary and greater than 2!");
        }
        addDoc(iw, i);
        done[i] = true;
        i = (i + 4) % N_DOCS;
        remaining--;
    }
    if (!doMultiSegment) {
        if (VERBOSE) {
            System.out.println("TEST: setUp full merge");
        }
        iw.forceMerge(1);
    }
    iw.close();
    if (VERBOSE) {
        System.out.println("TEST: setUp done close");
    }
}

From source file:org.apache.solr.update.SolrIndexConfig.java

License:Apache License

public IndexWriterConfig toIndexWriterConfig(IndexSchema schema) {
    // so that we can update the analyzer on core reload, we pass null
    // for the default analyzer, and explicitly pass an analyzer on 
    // appropriate calls to IndexWriter

    IndexWriterConfig iwc = new IndexWriterConfig(luceneVersion, null);
    if (maxBufferedDocs != -1)
        iwc.setMaxBufferedDocs(maxBufferedDocs);

    if (ramBufferSizeMB != -1)
        iwc.setRAMBufferSizeMB(ramBufferSizeMB);

    if (termIndexInterval != -1)
        iwc.setTermIndexInterval(termIndexInterval);

    if (writeLockTimeout != -1)
        iwc.setWriteLockTimeout(writeLockTimeout);

    iwc.setSimilarity(schema.getSimilarity());
    iwc.setMergePolicy(buildMergePolicy(schema));
    iwc.setMergeScheduler(buildMergeScheduler(schema));
    iwc.setInfoStream(infoStream);//from  ww  w.j  a  v a  2 s . co  m

    // do this after buildMergePolicy since the backcompat logic 
    // there may modify the effective useCompoundFile
    iwc.setUseCompoundFile(getUseCompoundFile());

    if (maxIndexingThreads != -1) {
        iwc.setMaxThreadStates(maxIndexingThreads);
    }

    if (mergedSegmentWarmerInfo != null) {
        // TODO: add infostream -> normal logging system (there is an issue somewhere)
        IndexReaderWarmer warmer = schema.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className,
                IndexReaderWarmer.class, null, new Class[] { InfoStream.class },
                new Object[] { iwc.getInfoStream() });
        iwc.setMergedSegmentWarmer(warmer);
    }

    return iwc;
}