Example usage for org.apache.lucene.index LogMergePolicy setMergeFactor

List of usage examples for org.apache.lucene.index LogMergePolicy setMergeFactor

Introduction

In this page you can find the example usage for org.apache.lucene.index LogMergePolicy setMergeFactor.

Prototype

public void setMergeFactor(int mergeFactor) 

Source Link

Document

Determines how often segment indices are merged by addDocument().

Usage

From source file:com.edgenius.wiki.search.lucene.SimpleIndexFactory.java

License:Open Source License

private IndexWriterConfig getIndexWriterConfig() {
    IndexWriterConfig conf = new IndexWriterConfig(LuceneConfig.VERSION, analyzerProvider.getIndexAnalyzer());
    conf.setMaxBufferedDocs(maxBufferedDocs);
    conf.setTermIndexInterval(termIndexInterval);
    conf.setWriteLockTimeout(writeLockTimeout);

    LogMergePolicy mergePolicy = new LogDocMergePolicy();
    mergePolicy.setUseCompoundFile(useCompoundFile);
    mergePolicy.setMaxMergeDocs(maxMergeDocs);
    mergePolicy.setMergeFactor(mergeFactor);
    conf.setMergePolicy(mergePolicy);/* w w  w  .j ava  2 s  .  c o m*/

    return conf;
}

From source file:net.sf.logsaw.index.internal.ARunWithIndexWriter.java

License:Open Source License

/**
 * Opens a Lucene index writer, executes the callback method and then closes the writer.
 * @param log the log resource, may be <code>null</code>
 * @param analyzer the Lucene analyzer to set on the index writer
 * @param matchVersion the Lucene match version
 * @return any object or <code>null</code>
 * @throws CoreException if an <strong>expected</strong> error occurred
 *//*  w ww . j a v  a  2s .com*/
protected final T runWithIndexWriter(ILogResource log, Analyzer analyzer, Version matchVersion)
        throws CoreException {
    logger.info("Opening index writer for '" + log.getName() + "'..."); //$NON-NLS-1$ //$NON-NLS-2$
    IndexWriter writer = null;
    try {
        Directory dir = FSDirectory.open(IndexPlugin.getDefault().getIndexFile(log));
        LogMergePolicy mp = new LogByteSizeMergePolicy();
        mp.setMergeFactor(30);
        IndexWriterConfig cfg = new IndexWriterConfig(matchVersion, analyzer);
        cfg.setMaxBufferedDocs(1000);
        cfg.setMergePolicy(mp);
        writer = new IndexWriter(dir, cfg);
        try {
            return doRunWithIndexWriter(writer, log);
        } finally {
            logger.info("Closing index writer for '" + log.getName() + "'..."); //$NON-NLS-1$ //$NON-NLS-2$
            writer.close();
        }
    } catch (CoreException e) {
        // Rethrow original CoreException
        throw e;
    } catch (Exception e) {
        // Unexpected exception; wrap with CoreException
        throw new CoreException(new Status(IStatus.ERROR, IndexPlugin.PLUGIN_ID,
                NLS.bind(Messages.LuceneIndexService_error_failedToUpdateIndex,
                        new Object[] { log.getName(), e.getLocalizedMessage() }),
                e));
    }
}

From source file:org.apache.oodt.cas.filemgr.catalog.LuceneCatalog.java

License:Apache License

private synchronized void removeProductDocument(Product product) throws CatalogException {

    try {/*www.java 2s .c o m*/
        reader = DirectoryReader.open(indexDir);
    } catch (IOException e) {
        e.printStackTrace();
    }
    try {
        LOG.log(Level.FINE,
                "LuceneCatalog: remove document from index for product: [" + product.getProductId() + "]");
        IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());

        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        LogMergePolicy lmp = new LogDocMergePolicy();
        lmp.setMergeFactor(mergeFactor);
        config.setMergePolicy(lmp);

        IndexWriter writer = new IndexWriter(indexDir, config);
        writer.deleteDocuments(new Term("product_id", product.getProductId()));
        writer.close();

    } catch (IOException e) {
        LOG.log(Level.WARNING, "Exception removing product: [" + product.getProductName()
                + "] from index: Message: " + e.getMessage());
        throw new CatalogException(e.getMessage(), e);
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (Exception ignore) {
            }

        }

    }
}

From source file:org.apache.oodt.cas.filemgr.catalog.LuceneCatalog.java

License:Apache License

private synchronized void addCompleteProductToIndex(CompleteProduct cp) throws CatalogException {
    IndexWriter writer = null;/*from w  w w.ja v a  2s . c  o m*/
    try {
        /*writer = new IndexWriter(indexFilePath, new StandardAnalyzer(),
            createIndex);*/
        //writer.setCommitLockTimeout(this.commitLockTimeout * 1000);
        //writer.setWriteLockTimeout(this.writeLockTimeout * 1000);
        IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());

        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        LogMergePolicy lmp = new LogDocMergePolicy();
        lmp.setMergeFactor(mergeFactor);
        config.setMergePolicy(lmp);

        writer = new IndexWriter(indexDir, config);

        Document doc = toDoc(cp.getProduct(), cp.getMetadata());
        writer.addDocument(doc);
        // TODO: determine a better way to optimize the index
    } catch (Exception e) {
        LOG.log(Level.WARNING, "Unable to index product: [" + cp.getProduct().getProductName() + "]: Message: "
                + e.getMessage(), e);
        throw new CatalogException("Unable to index product: [" + cp.getProduct().getProductName()
                + "]: Message: " + e.getMessage(), e);
    } finally {
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (Exception e) {
            System.out.println("failed" + e.getLocalizedMessage());
        }
    }

}

From source file:org.apache.oodt.cas.filemgr.tools.OptimizeLuceneCatalog.java

License:Apache License

public void doOptimize() {
    IndexWriter writer = null;//  www . j av  a2 s . c  o  m
    boolean createIndex = false;

    try {
        writer = new IndexWriter(reader.directory(), config);
        LogMergePolicy lmp = new LogDocMergePolicy();
        lmp.setMergeFactor(this.mergeFactor);
        config.setMergePolicy(lmp);

        long timeBefore = System.currentTimeMillis();
        //TODO http://blog.trifork.com/2011/11/21/simon-says-optimize-is-bad-for-you/
        //writer.optimize();
        long timeAfter = System.currentTimeMillis();
        double numSeconds = ((timeAfter - timeBefore) * 1.0) / DOUBLE;
        LOG.log(Level.INFO,
                "LuceneCatalog: [" + this.catalogPath + "] optimized: took: [" + numSeconds + "] seconds");
    } catch (IOException e) {
        LOG.log(Level.WARNING,
                "Unable to optimize lucene index: [" + catalogPath + "]: Message: " + e.getMessage());
    } finally {
        try {
            writer.close();
        } catch (Exception ignore) {
        }
    }
}

From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepository.java

License:Apache License

private synchronized void removeWorkflowInstanceDocument(WorkflowInstance inst)
        throws InstanceRepositoryException {
    IndexReader reader = null;//from ww w .ja  v a2 s .c  om
    try {
        reader = DirectoryReader.open(indexDir);
    } catch (IOException e) {
        e.printStackTrace();
    }
    try {
        reader = DirectoryReader.open(indexDir);
        IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());

        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        LogMergePolicy lmp = new LogDocMergePolicy();
        lmp.setMergeFactor(mergeFactor);
        config.setMergePolicy(lmp);

        IndexWriter writer = new IndexWriter(indexDir, config);
        LOG.log(Level.FINE, "LuceneWorkflowEngine: remove document from index for workflow instance: ["
                + inst.getId() + "]");
        writer.deleteDocuments(new Term("workflow_inst_id", inst.getId()));
        writer.close();
    } catch (IOException e) {
        LOG.log(Level.SEVERE, e.getMessage());
        LOG.log(Level.WARNING, "Exception removing workflow instance: [" + inst.getId()
                + "] from index: Message: " + e.getMessage());
        throw new InstanceRepositoryException(e.getMessage());
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (Exception ignore) {
            }

        }

    }
}

From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepository.java

License:Apache License

private synchronized void addWorkflowInstanceToCatalog(WorkflowInstance wInst)
        throws InstanceRepositoryException {
    IndexWriter writer = null;//from  w ww .j  a v  a 2 s.  com

    try {
        IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());

        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        LogMergePolicy lmp = new LogDocMergePolicy();
        lmp.setMergeFactor(mergeFactor);
        config.setMergePolicy(lmp);

        writer = new IndexWriter(indexDir, config);
        Document doc = toDoc(wInst);
        writer.addDocument(doc);
    } catch (IOException e) {
        LOG.log(Level.WARNING,
                "Unable to index workflow instance: [" + wInst.getId() + "]: Message: " + e.getMessage());
        throw new InstanceRepositoryException(
                "Unable to index workflow instance: [" + wInst.getId() + "]: Message: " + e.getMessage());
    } finally {
        try {
            writer.close();
        } catch (Exception e) {
            System.out.println(e);
        }
    }

}

From source file:org.apache.solr.spelling.FileBasedSpellChecker.java

License:Apache License

private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) {
    try {/*from   ww  w .  ja  v  a2  s  . c om*/
        IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema();
        // Get the field's analyzer
        if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) {
            FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
            // Do index-time analysis using the given fieldType's analyzer
            RAMDirectory ramDir = new RAMDirectory();

            LogMergePolicy mp = new LogByteSizeMergePolicy();
            mp.setMergeFactor(300);

            IndexWriter writer = new IndexWriter(ramDir,
                    new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, fieldType.getAnalyzer())
                            .setMaxBufferedDocs(150).setMergePolicy(mp)
                            .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
            // TODO: if we enable this, codec gets angry since field won't exist in the schema
            // .setCodec(core.getCodec())
            );

            List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding);

            for (String s : lines) {
                Document d = new Document();
                d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO));
                writer.addDocument(d);
            }
            writer.forceMerge(1);
            writer.close();

            dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir), WORD_FIELD_NAME, 0.0f);
        } else {
            // check if character encoding is defined
            if (characterEncoding == null) {
                dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation));
            } else {
                dictionary = new PlainTextDictionary(new InputStreamReader(
                        core.getResourceLoader().openResource(sourceLocation), characterEncoding));
            }
        }

    } catch (IOException e) {
        log.error("Unable to load spellings", e);
    }
}

From source file:org.apache.solr.update.SolrIndexConfig.java

License:Apache License

/**
 * Builds a MergePolicy, may also modify the value returned by
 * getUseCompoundFile() for use by the IndexWriterConfig if 
 * "useCompoundFile" is specified as an init arg for 
 * an out of the box MergePolicy that no longer supports it
 *
 * @see #fixUseCFMergePolicyInitArg// w  w w .  j  a  va 2  s .c  o m
 * @see #getUseCompoundFile
 */
private MergePolicy buildMergePolicy(IndexSchema schema) {
    String mpClassName = mergePolicyInfo == null ? defaultMergePolicyClassName : mergePolicyInfo.className;

    MergePolicy policy = schema.getResourceLoader().newInstance(mpClassName, MergePolicy.class);

    if (policy instanceof LogMergePolicy) {
        LogMergePolicy logMergePolicy = (LogMergePolicy) policy;
        fixUseCFMergePolicyInitArg(LogMergePolicy.class);

        if (maxMergeDocs != -1)
            logMergePolicy.setMaxMergeDocs(maxMergeDocs);

        logMergePolicy.setNoCFSRatio(getUseCompoundFile() ? 1.0 : 0.0);

        if (mergeFactor != -1)
            logMergePolicy.setMergeFactor(mergeFactor);

    } else if (policy instanceof TieredMergePolicy) {
        TieredMergePolicy tieredMergePolicy = (TieredMergePolicy) policy;
        fixUseCFMergePolicyInitArg(TieredMergePolicy.class);

        tieredMergePolicy.setNoCFSRatio(getUseCompoundFile() ? 1.0 : 0.0);

        if (mergeFactor != -1) {
            tieredMergePolicy.setMaxMergeAtOnce(mergeFactor);
            tieredMergePolicy.setSegmentsPerTier(mergeFactor);
        }

    } else if (mergeFactor != -1) {
        log.warn(
                "Use of <mergeFactor> cannot be configured if merge policy is not an instance of LogMergePolicy or TieredMergePolicy. The configured policy's defaults will be used.");
    }

    if (mergePolicyInfo != null)
        SolrPluginUtils.invokeSetters(policy, mergePolicyInfo.initArgs);

    return policy;
}

From source file:org.fracturedatlas.athena.apa.indexing.IndexingApaAdapter.java

License:Open Source License

public void initializeIndex() {
    analyzer = new WhitespaceLowerCaseAnalyzer();
    config = new IndexWriterConfig(Version.LUCENE_32, analyzer);
    LogMergePolicy mergePolicy = new LogDocMergePolicy();
    mergePolicy.setMergeFactor(MERGE_FACTOR);
    config.setMergePolicy(mergePolicy);/*from  ww w  .  j a v  a  2s  . c o m*/

}