Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode) 

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:org.apache.nutch.indexwriter.lucene.LuceneWriter.java

License:Apache License

public void open(JobConf job, String name) throws IOException {
    this.fs = FileSystem.get(job);
    perm = new Path(FileOutputFormat.getOutputPath(job), name);
    temp = job.getLocalPath("index/_" + Integer.toString(new Random().nextInt()));
    fs.delete(perm, true); // delete old, if any
    analyzerFactory = new AnalyzerFactory(job);
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_10_2,
            new SmartChineseAnalyzer());
    LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy();
    mergePolicy.setMergeFactor(job.getInt("indexer.mergeFactor", 10));
    mergePolicy.setMaxMergeDocs(job.getInt("indexer.maxMergeDocs", Integer.MAX_VALUE));

    indexWriterConfig.setMergePolicy(mergePolicy);
    indexWriterConfig.setUseCompoundFile(false);
    indexWriterConfig.setTermIndexInterval(job.getInt("indexer.termIndexInterval", 128));
    indexWriterConfig.setMaxBufferedDocs(job.getInt("indexer.minMergeDocs", 100));
    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    writer = new org.apache.lucene.index.IndexWriter(
            FSDirectory.open(new File(fs.startLocalOutput(perm, temp).toString())), indexWriterConfig);

    /*//from w  ww.  jav a  2 s  . com
     * addFieldOptions("title", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job);
     * addFieldOptions("url", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job);
     * addFieldOptions("content", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job);
     * addFieldOptions("lang", STORE.YES, INDEX.UNTOKENIZED, VECTOR.NO, job);
     */

    processOptions(job);
}

From source file:org.apache.oodt.cas.filemgr.catalog.LuceneCatalog.java

License:Apache License

private synchronized void removeProductDocument(Product product) throws CatalogException {

    try {/*w w  w . j  ava  2s  .co m*/
        reader = DirectoryReader.open(indexDir);
    } catch (IOException e) {
        e.printStackTrace();
    }
    try {
        LOG.log(Level.FINE,
                "LuceneCatalog: remove document from index for product: [" + product.getProductId() + "]");
        IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());

        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        LogMergePolicy lmp = new LogDocMergePolicy();
        lmp.setMergeFactor(mergeFactor);
        config.setMergePolicy(lmp);

        IndexWriter writer = new IndexWriter(indexDir, config);
        writer.deleteDocuments(new Term("product_id", product.getProductId()));
        writer.close();

    } catch (IOException e) {
        LOG.log(Level.WARNING, "Exception removing product: [" + product.getProductName()
                + "] from index: Message: " + e.getMessage());
        throw new CatalogException(e.getMessage(), e);
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (Exception ignore) {
            }

        }

    }
}

From source file:org.apache.oodt.cas.filemgr.catalog.LuceneCatalog.java

License:Apache License

private synchronized void addCompleteProductToIndex(CompleteProduct cp) throws CatalogException {
    IndexWriter writer = null;/*  w  ww  .  ja va  2 s.  co m*/
    try {
        /*writer = new IndexWriter(indexFilePath, new StandardAnalyzer(),
            createIndex);*/
        //writer.setCommitLockTimeout(this.commitLockTimeout * 1000);
        //writer.setWriteLockTimeout(this.writeLockTimeout * 1000);
        IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());

        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        LogMergePolicy lmp = new LogDocMergePolicy();
        lmp.setMergeFactor(mergeFactor);
        config.setMergePolicy(lmp);

        writer = new IndexWriter(indexDir, config);

        Document doc = toDoc(cp.getProduct(), cp.getMetadata());
        writer.addDocument(doc);
        // TODO: determine a better way to optimize the index
    } catch (Exception e) {
        LOG.log(Level.WARNING, "Unable to index product: [" + cp.getProduct().getProductName() + "]: Message: "
                + e.getMessage(), e);
        throw new CatalogException("Unable to index product: [" + cp.getProduct().getProductName()
                + "]: Message: " + e.getMessage(), e);
    } finally {
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (Exception e) {
            System.out.println("failed" + e.getLocalizedMessage());
        }
    }

}

From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepository.java

License:Apache License

private synchronized void removeWorkflowInstanceDocument(WorkflowInstance inst)
        throws InstanceRepositoryException {
    IndexReader reader = null;//from   ww w.  ja v a  2 s.c o  m
    try {
        reader = DirectoryReader.open(indexDir);
    } catch (IOException e) {
        e.printStackTrace();
    }
    try {
        reader = DirectoryReader.open(indexDir);
        IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());

        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        LogMergePolicy lmp = new LogDocMergePolicy();
        lmp.setMergeFactor(mergeFactor);
        config.setMergePolicy(lmp);

        IndexWriter writer = new IndexWriter(indexDir, config);
        LOG.log(Level.FINE, "LuceneWorkflowEngine: remove document from index for workflow instance: ["
                + inst.getId() + "]");
        writer.deleteDocuments(new Term("workflow_inst_id", inst.getId()));
        writer.close();
    } catch (IOException e) {
        LOG.log(Level.SEVERE, e.getMessage());
        LOG.log(Level.WARNING, "Exception removing workflow instance: [" + inst.getId()
                + "] from index: Message: " + e.getMessage());
        throw new InstanceRepositoryException(e.getMessage());
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (Exception ignore) {
            }

        }

    }
}

From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepository.java

License:Apache License

private synchronized void addWorkflowInstanceToCatalog(WorkflowInstance wInst)
        throws InstanceRepositoryException {
    IndexWriter writer = null;//from w  ww  . ja  v  a  2s  .  com

    try {
        IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());

        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        LogMergePolicy lmp = new LogDocMergePolicy();
        lmp.setMergeFactor(mergeFactor);
        config.setMergePolicy(lmp);

        writer = new IndexWriter(indexDir, config);
        Document doc = toDoc(wInst);
        writer.addDocument(doc);
    } catch (IOException e) {
        LOG.log(Level.WARNING,
                "Unable to index workflow instance: [" + wInst.getId() + "]: Message: " + e.getMessage());
        throw new InstanceRepositoryException(
                "Unable to index workflow instance: [" + wInst.getId() + "]: Message: " + e.getMessage());
    } finally {
        try {
            writer.close();
        } catch (Exception e) {
            System.out.println(e);
        }
    }

}

From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepositoryFactory.java

License:Apache License

public WorkflowInstanceRepository createInstanceRepository() {
    Directory indexDir = null;/*from  ww w.  ja  v  a  2 s.c o m*/
    try {
        indexDir = FSDirectory.open(new File(indexFilePath).toPath());
    } catch (IOException e) {
        e.printStackTrace();
    }
    // Create the index if it does not already exist
    IndexWriter writer = null;
    try {
        IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());

        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        LogMergePolicy lmp = new LogDocMergePolicy();
        config.setMergePolicy(lmp);

        writer = new IndexWriter(indexDir, config);
    } catch (Exception e) {
        LOG.severe("Unable to create index: " + e.getMessage());
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
                LOG.severe("Unable to close index: " + e.getMessage());
            }
        }
    }

    return new LuceneWorkflowInstanceRepository(indexFilePath, pageSize);
}

From source file:org.apache.pdfbox.examples.lucene.IndexPDFFiles.java

License:Apache License

/**
 * Index all text files under a directory.
 * /*www  .  jav  a  2s .  c  om*/
 * @param args command line arguments
 * 
 */
public static void main(String[] args) {
    String usage = "java org.apache.pdfbox.lucene.IndexPDFFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes all PDF documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer. But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here. This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:org.apache.solr.codecs.test.testDeleteDocs.java

License:Apache License

public static void main(String[] args) {
    try {/*from   w w w  .  jav a 2  s.co m*/
        plaintextDir = assureDirectoryExists(new File(INDEX_ROOT_FOLDER));

        //----------- index documents -------
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_0);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_0, analyzer);
        // recreate the index on each execution
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        //config.setCodec(new SimpleTextCodec()); 

        Properties props = new Properties();
        FileInputStream fstream = new FileInputStream(
                "C:\\work\\search_engine\\codec\\solr410\\solr_codectest\\collection1\\conf\\kvstore.properties");
        props.load(fstream);
        fstream.close();
        ONSQLKVstoreHandler.getInstance().setKVStore("omega", props);
        ONSQLCodec codec = new ONSQLCodec();
        config.setCodec(codec);
        config.setUseCompoundFile(false);
        Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER));
        IndexWriter writer = new IndexWriter(luceneDir, config);
        QueryParser queryParser = new QueryParser(Version.LUCENE_4_10_0, "title", analyzer);
        String search_word = "fourth";
        Query query = queryParser.parse(search_word);
        writer.deleteDocuments(query);
        writer.commit();
        writer.close();
        searchIndex("title", search_word);
    } catch (Throwable te) {
        te.printStackTrace();
    }
}

From source file:org.apache.solr.codecs.test.testMergeSegments.java

License:Apache License

public static void main(String[] args) {
    try {/*from   ww w .j a v a2s .  c o m*/
        testUtil.initPropsONSQL();
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer);
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        ONSQLCodec codec = new ONSQLCodec();
        config.setCodec(codec);
        config.setUseCompoundFile(false);
        Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER));
        IndexWriter writer = new IndexWriter(luceneDir, config);
        writer.forceMerge(1);
        writer.close();

    } catch (Throwable te) {
        te.printStackTrace();
    }
}

From source file:org.apache.solr.codecs.test.testONSQLCodec.java

License:Apache License

public static void main(String[] args) {
    try {/*w ww .ja  va  2s .  com*/
        plaintextDir = assureDirectoryExists(new File(INDEX_ROOT_FOLDER));
        testUtil.initPropsONSQL();
        //----------- index documents -------
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer);
        // recreate the index on each execution
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        //config.setCodec(new SimpleTextCodec());            
        ONSQLCodec codec = new ONSQLCodec();
        config.setCodec(codec);
        config.setUseCompoundFile(false);
        Directory luceneDir = FSDirectory.open(plaintextDir);
        IndexWriter writer = new IndexWriter(luceneDir, config);
        writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES),
                new TextField("content", "The content of the first document", Store.YES),
                new IntField("intval", 111111, Store.YES), new LongField("longval", 1111111111L, Store.YES)));

        writer.addDocument(Arrays.asList(new TextField("title", "The tAtle of the second document", Store.YES),
                new TextField("content", "The content of the second document", Store.YES),
                new IntField("intval", 222222, Store.YES), new LongField("longval", 222222222L, Store.YES)));
        writer.addDocument(Arrays.asList(new TextField("title", "The title of the third document", Store.YES),
                new TextField("content", "The content of the third document", Store.YES),
                new IntField("intval", 333333, Store.YES), new LongField("longval", 3333333333L, Store.YES)));
        writer.commit();
        writer.close();
        IndexReader reader = DirectoryReader.open(luceneDir);
        // now test for docs
        if (reader.numDocs() < 3)
            throw new IOException("amount of returned docs are less than indexed");
        else
            System.out.println("test passed");
        searchIndex("content", "third");
    } catch (Throwable te) {
        te.printStackTrace();
    }
}