Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode) 

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:de.citec.lucene.CreateIndex.java

public static void main(String[] args) throws IOException {
    Analyzer analyzer = null;/*from  w  w  w . j  a v  a  2 s.  com*/

    List<String> files = new ArrayList<>();
    files.add("/Users/swalter/Documents/EsaDeutsch/new_copus_german.txt");
    String indexPath = "/Users/swalter/Documents/EsaDeutsch/Index/";
    Language language = Language.DE;
    Directory dir = FSDirectory.open(Paths.get(indexPath));

    if (language.equals(Language.DE))
        analyzer = new GermanAnalyzer();
    if (language.equals(Language.ES))
        analyzer = new SpanishAnalyzer();
    if (language.equals(Language.EN))
        analyzer = new EnglishAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(12000);
    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        files.forEach(f -> {
            try {
                indexDocs(writer, Paths.get(f));
            } catch (IOException ex) {
                Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex);
            }
        });

    }
    System.out.println(counter);

}

From source file:de.citec.sc.sentence.preprocessing.lucene.CreateIndex.java

public static void main(String[] args) throws IOException {
    Analyzer analyzer = null;//ww  w .ja v a2  s.c  o  m

    List<String> files = new ArrayList<>();
    files.add("/Users/swalter/Downloads/german_sentences_reduced.txt");
    String indexPath = "/Users/swalter/Index/GermanIndexReduced/";
    Language language = Language.DE;
    Directory dir = FSDirectory.open(Paths.get(indexPath));

    //files.add("/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/idealSents_mecab_jdepp_rmvPunct_CoNLLU");
    //String indexPath = "/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/index";
    //Language language = Language.JA;
    //Directory dir = FSDirectory.open(Paths.get(indexPath));

    if (language.equals(Language.DE))
        analyzer = new GermanAnalyzer();
    if (language.equals(Language.ES))
        analyzer = new SpanishAnalyzer();
    if (language.equals(Language.EN))
        analyzer = new EnglishAnalyzer();
    if (language.equals(Language.JA))
        analyzer = new JapaneseAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(12000);
    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        files.forEach(f -> {
            try {
                indexDocs(writer, Paths.get(f), language);
            } catch (IOException ex) {
                Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex);
            }
        });

    }

}

From source file:de.csw.linkgenerator.plugin.lucene.IndexUpdater.java

License:Open Source License

private void openWriter(OpenMode openMode) {
    if (writer != null) {
        LOG.error("Writer already open and createWriter called");
        return;// w  w w .j  a v a 2  s  . co  m
    }

    try {
        // fix for windows by Daniel Cortes:
        //            FSDirectory f = FSDirectory.getDirectory(indexDir);
        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, analyzer);
        conf.setOpenMode(openMode);

        // Ralph: This is kind of guesswork
        LogDocMergePolicy mergePolicy = new LogDocMergePolicy();
        mergePolicy.setUseCompoundFile(true);
        conf.setMergePolicy(mergePolicy);
        // writer = new IndexWriter (indexDir, analyzer, create);
        writer = new IndexWriter(indexDir, conf);
        //            writer.setUseCompoundFile(true);

        if (LOG.isDebugEnabled()) {
            LOG.debug("successfully opened index writer : " + indexDir);
        }
    } catch (IOException e) {
        LOG.error("IOException when opening Lucene Index for writing at " + indexDir, e);
    }
}

From source file:de.dfki.km.leech.lucene.ToLuceneContentHandler.java

License:Open Source License

synchronized protected IndexWriter getCurrentWriter()
        throws CorruptIndexException, LockObtainFailedException, IOException {

    if (getSplitAndMergeIndex() <= 0)
        return m_initialLuceneWriter;

    if (m_luceneWriter.maxDoc() < getSplitAndMergeIndex())
        return m_luceneWriter;

    Directory directory = m_initialLuceneWriter.getDirectory();

    Path fOurTmpDir = null;//from   w w  w .  j  av  a2 s.  c om
    if (directory instanceof FSDirectory) {
        if (m_luceneWriter != m_initialLuceneWriter)
            m_llIndexWriter2Close.add(m_luceneWriter);

        String strTmpPath = ((FSDirectory) directory).getDirectory().toAbsolutePath().toString();
        // if(strTmpPath.charAt(strTmpPath.length() - 1) == '/' || strTmpPath.charAt(strTmpPath.length() - 1) == '\\')
        // strTmpPath = strTmpPath.substring(0, strTmpPath.length() - 1);
        strTmpPath += "_" + (m_hsTmpLuceneWriterPaths2Merge.size() + 1);
        fOurTmpDir = Paths.get(strTmpPath);
    } else {
        // wir brauchen was temporres
        File parentDir = new File(System.getProperty("java.io.tmpdir"));
        fOurTmpDir = Paths.get(parentDir.getAbsolutePath() + "/leechTmp/"
                + UUID.randomUUID().toString().replaceAll("\\W", "_"));
    }

    Logger.getLogger(ToLuceneContentHandler.class.getName()).info("Current index exceeds "
            + m_iSplitIndexDocumentCount + " documents. Will create another temporary one under " + fOurTmpDir);

    @SuppressWarnings("deprecation")
    IndexWriterConfig config = new IndexWriterConfig(m_initialLuceneWriter.getConfig().getAnalyzer());
    config.setOpenMode(OpenMode.CREATE);

    m_luceneWriter = new IndexWriter(new SimpleFSDirectory(fOurTmpDir), config);
    m_hsTmpLuceneWriterPaths2Merge.add(fOurTmpDir.toAbsolutePath().toString());

    return m_luceneWriter;
}

From source file:de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory.java

License:Open Source License

/**
 * Creates all writer, reader, and searcher objects if necessary
 * /*  w  ww  .  j  av  a  2s .  com*/
 * @throws CorruptIndexException
 * @throws LockObtainFailedException
 * @throws IOException
 */
public void openLuceneStuff() throws CorruptIndexException, LockObtainFailedException, IOException {
    if (m_indexWriter == null) {
        IndexWriterConfig config = new IndexWriterConfig(new KeywordAnalyzer());
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);

        m_indexWriter = new IndexWriter(new SimpleFSDirectory(Paths.get(m_strHistoryPath)), config);
    }

    if (m_indexReader == null)
        m_indexReader = DirectoryReader.open(m_indexWriter, true, true);

    if (m_indexSearcher == null)
        m_indexSearcher = new IndexSearcher(m_indexReader);
}

From source file:de.elbe5.cms.search.SearchBean.java

License:Open Source License

protected IndexWriter openIndexWriter(boolean create, String indexPath) throws Exception {
    ensureDirectory(indexPath);//from  ww w .j  a v  a2 s. c  o  m
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    if (create) {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    return new IndexWriter(dir, iwc);
}

From source file:de.hsmannheim.ss15.alr.searchengine.DefaultLuceneController.java

public void refreshIndex() {
    boolean create = true;
    final Path docDir = Paths.get(docsDir);
    IndexWriter writer = null;//  ww  w .jav a  2 s  . c om
    try {

        Directory dir = FSDirectory.open(Paths.get(indexDir));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        // Add new documents to an existing index:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);
        writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);
        writer.close();

    } catch (IOException e) {
        LOGGER.warn("Exception while indexing", e);

    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (IOException ex) {
                java.util.logging.Logger.getLogger(DefaultLuceneController.class.getName()).log(Level.SEVERE,
                        null, ex);
            }
        }
    }
}

From source file:de.jetsli.lumeo.LucPerfTest.java

License:Apache License

public void testPerf() {
    new PerfRunner(1000000, 26f) {

        @Override/* w ww. j  av  a 2s  .  c om*/
        public void reinit() throws Exception {
            super.reinit();

            if (nrtManager != null) {
                nrtManager.close();
                reopenThread.close();
                writer.waitForMerges();
                writer.close();
                dir.close();
            }
            Helper.deleteDir(file);
            docs = 0;
            IndexWriterConfig cfg = new IndexWriterConfig(version, keyAna);
            cfg.setRAMBufferSizeMB(128);

            //                cfg.setCodec(new Lucene40Codec() {
            //
            //                    @Override public PostingsFormat getPostingsFormatForField(String field) {
            //                        if ("_id".equals(field))
            //                            return new Pulsing40PostingsFormat();
            //                        else
            //                            return new Lucene40PostingsFormat();
            //                    }
            //                });
            LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
            mp.setUseCompoundFile(false);
            cfg.setMergePolicy(mp);
            dir = FSDirectory.open(file);
            cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

            writer = new IndexWriter(dir, cfg);
            nrtManager = new NRTManager(writer, new SearcherWarmer() {

                @Override
                public void warm(IndexSearcher s) throws IOException {
                    // TODO get some random vertices via getVertices?
                }
            });

            int priority = Math.min(Thread.currentThread().getPriority() + 2, Thread.MAX_PRIORITY);
            reopenThread = new NRTManagerReopenThread(nrtManager, 5.0, 0.03);
            reopenThread.setName("NRT Reopen Thread");
            reopenThread.setPriority(priority);
            reopenThread.setDaemon(true);
            reopenThread.start();
        }

        final BytesRef bytes = new BytesRef();

        @Override
        public void innerRun(int trial, int i) {
            long id = i;
            Document newDoc = new Document();
            NumericField idField = new NumericField("_id", 6, NumericField.TYPE_STORED).setLongValue(id);
            Field uIdField = new Field("_uid", "" + id, StringField.TYPE_STORED);
            Field typeField = new Field("_type", "test", StringField.TYPE_STORED);

            newDoc.add(idField);
            newDoc.add(uIdField);
            newDoc.add(typeField);
            //                Analyzer ana = anas.get(newDoc.get("_type"));
            try {
                NumericUtils.longToPrefixCoded(id, 0, bytes);
                latestGen = nrtManager.updateDocument(new Term("_id", bytes), newDoc, keyAna);
                docs++;
            } catch (IOException ex) {
                logger.error("Cannot update " + i, ex);
            }
        }

        @Override
        protected void finalAssert() throws Exception {
            // logger.info("wait for " + latestGen + ", current:" + nrtManager.getCurrentSearchingGen(true));
            nrtManager.waitForGeneration(latestGen, true);
            //                writer.commit();
            //                writer.waitForMerges();
            SearcherManager mng = nrtManager.getSearcherManager(true);
            //                mng.maybeReopen();
            IndexSearcher searcher = mng.acquire();
            try {
                TotalHitCountCollector coll = new TotalHitCountCollector();
                searcher.search(new MatchAllDocsQuery(), coll);
                long total = coll.getTotalHits();
                if (docs != total)
                    throw new IllegalStateException(total + " vs. " + docs);
            } finally {
                nrtManager.getSearcherManager(true).release(searcher);
            }
        }
    }.run();
}

From source file:de.jetsli.lumeo.RawLucene.java

License:Apache License

public RawLucene init() {
    indexLock();/*from w  w  w. ja v a 2  s .c  o m*/
    try {
        if (closed)
            throw new IllegalStateException("Already closed");

        if (writer != null)
            throw new IllegalStateException("Already initialized");

        // release locks when started
        if (IndexWriter.isLocked(dir)) {
            logger.warn("index is locked + " + name + " -> releasing lock");
            IndexWriter.unlock(dir);
        }
        IndexWriterConfig cfg = new IndexWriterConfig(VERSION, defaultMapping.getCombinedAnalyzer());
        LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
        mp.setMaxMergeMB(getMaxMergeMB());
        cfg.setRAMBufferSizeMB(ramBufferSizeMB);
        cfg.setTermIndexInterval(termIndexIntervalSize);
        cfg.setMergePolicy(mp);

        // TODO specify different formats for id fields etc
        // -> this breaks 16 of our tests!? Lucene Bug?
        //            cfg.setCodec(new Lucene40Codec() {
        //
        //                @Override public PostingsFormat getPostingsFormatForField(String field) {
        //                    return new Pulsing40PostingsFormat();
        //                }
        //            });

        // cfg.setMaxThreadStates(8);
        boolean create = !DirectoryReader.indexExists(dir);
        cfg.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND);

        //wrap the writer with a tracking index writer
        writer = new TrackingIndexWriter(new IndexWriter(dir, cfg));

        nrtManager = new NRTManager(writer, new SearcherFactory() {
            //              @Override
            //              public IndexSearcher newSearcher(IndexReader reader) throws IOException {
            //                //TODO do some kind of warming here?
            //                return new IndexSearcher(reader);
            //              }              
        });

        getCurrentRTCache(latestGen);
        int priority = Math.min(Thread.currentThread().getPriority() + 2, Thread.MAX_PRIORITY);
        flushThread = new FlushThread("flush-thread");
        flushThread.setPriority(priority);
        flushThread.setDaemon(true);
        flushThread.start();

        reopenThread = new NRTManagerReopenThread(nrtManager, ordinaryWaiting, incomingSearchesMaximumWaiting);
        reopenThread.setName("NRT Reopen Thread");
        reopenThread.setPriority(priority);
        reopenThread.setDaemon(true);
        reopenThread.start();
        return this;
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        indexUnlock();
    }
}

From source file:de.ks.flatadocdb.index.LuceneIndex.java

License:Apache License

public LuceneIndex(Repository repository, MetaModel metaModel, ExecutorService executorService)
        throws RuntimeException {
    super(repository, metaModel, executorService);
    try {//  www  . j a v a2  s . co m
        Path resolve = repository.getPath().resolve(LUCENE_INDEX_FOLDER);
        Files.createDirectories(resolve);

        TimeProfiler profiler = new TimeProfiler("Lucene loading").start();
        try {
            this.directory = FSDirectory.open(resolve);
            analyzer = new StandardAnalyzer();
            IndexWriterConfig cfg = new IndexWriterConfig(analyzer);
            cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            indexWriter = new IndexWriter(directory, cfg);
            reopenIndexReader();
        } finally {
            profiler.stop().logDebug(log);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}