Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode)

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:edu.ur.ir.index.DefaultPowerPointTextExtractorTest.java

License:Apache License

/**
 * Test basic search within a plain text document
 * /*w w w.  ja  v a2s  . c om*/
 * @param description
 * @throws Exception 
 */
public void testIndexPowerPointDocument() throws Exception {

    RepositoryBasedTestHelper repoHelper = new RepositoryBasedTestHelper();
    Repository repo = repoHelper.createRepository("localFileServer", "displayName", "file_database",
            "my_repository", properties.getProperty("a_repo_path"), "default_folder");

    // create the first file to store in the temporary folder
    String tempDirectory = properties.getProperty("ir_core_temp_directory");
    File directory = new File(tempDirectory);

    // helper to create the file
    FileUtil testUtil = new FileUtil();
    testUtil.createDirectory(directory);

    String baseLocation = properties.getProperty("ir_core_location");

    String powerPointFile = properties.getProperty("power_point_file");
    File f1 = new File(baseLocation + powerPointFile);

    assert f1 != null : "File should not be null";
    assert f1.canRead() : "Should be able to read the file " + f1.getAbsolutePath();

    FileInfo info = repo.getFileDatabase().addFile(f1, "indexed_power_point_file");
    info.setExtension("ppt");

    FileTextExtractor documentCreator = new DefaultPowerPointTextExtractor();
    assert documentCreator.canExtractText(info.getExtension()) : "Cannot create document for extension "
            + info.getExtension();

    String text = documentCreator.getText(new File(info.getFullPath()));

    Document doc = new Document();
    doc.add(new Field("body", text, Field.Store.NO, Field.Index.ANALYZED));
    assert doc != null : "Document should be created";

    // create the lucene directory in memory
    Directory dir;
    try {
        dir = new RAMDirectory();
    } catch (Exception e1) {
        throw new RuntimeException(e1);
    }

    // store the document
    IndexWriter writer = null;
    try {
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35,
                new StandardWithACIIFoldingFilter());
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        writer = new IndexWriter(dir, indexWriterConfig);
        writer.addDocument(doc);
        writer.close();
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
                // do nothing
            }
        }
    }

    // search the document
    try {
        int hits = executeQuery("body", "irFile", dir);
        assert hits == 1 : "Hit count should equal 1 but equals " + hits;

        hits = executeQuery("body", "hello", dir);

        assert hits == 1 : "Hit count should equal 1 but equals " + hits;

    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    repoHelper.cleanUpRepository();

}

From source file:edu.ur.ir.index.DefaultPowerPointXmlTextExtractorTest.java

License:Apache License

/**
 * Test basic search within a plain text document
 * //  w  w  w  .  j  a  va  2s.  c o m
 * @param description
 * @throws Exception 
 */
public void testIndexPowerPointDocument() throws Exception {

    RepositoryBasedTestHelper repoHelper = new RepositoryBasedTestHelper();
    Repository repo = repoHelper.createRepository("localFileServer", "displayName", "file_database",
            "my_repository", properties.getProperty("a_repo_path"), "default_folder");

    // create the first file to store in the temporary folder
    String tempDirectory = properties.getProperty("ir_core_temp_directory");
    File directory = new File(tempDirectory);

    // helper to create the file
    FileUtil testUtil = new FileUtil();
    testUtil.createDirectory(directory);

    String baseLocation = properties.getProperty("ir_core_location");
    String powerPointXmlFile = properties.getProperty("power_point_xml_file");
    File f1 = new File(baseLocation + powerPointXmlFile);

    assert f1 != null : "File should not be null";
    assert f1.canRead() : "Should be able to read the file " + f1.getAbsolutePath();

    FileInfo info = repo.getFileDatabase().addFile(f1, "indexed_power_point_file");
    info.setExtension("pptx");

    FileTextExtractor documentCreator = new DefaultPowerPointXmlTextExtractor();
    assert documentCreator.canExtractText(info.getExtension()) : "Cannot create document for extension "
            + info.getExtension();

    String text = documentCreator.getText(new File(info.getFullPath()));

    Document doc = new Document();
    doc.add(new Field("body", text, Field.Store.NO, Field.Index.ANALYZED));
    assert doc != null : "Document should be created";

    // create the lucene directory in memory
    Directory dir;
    try {
        dir = new RAMDirectory();
    } catch (Exception e1) {
        throw new RuntimeException(e1);
    }

    // store the document
    IndexWriter writer = null;
    try {
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35,
                new StandardWithACIIFoldingFilter());
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        writer = new IndexWriter(dir, indexWriterConfig);
        writer.addDocument(doc);
        writer.close();
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
                // do nothing
            }
        }
    }

    // search the document
    try {
        int hits = executeQuery("body", "irFile", dir);
        assert hits == 1 : "Hit count should equal 1 but equals " + hits;

        hits = executeQuery("body", "hello", dir);

        assert hits == 1 : "Hit count should equal 1 but equals " + hits;

    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    repoHelper.cleanUpRepository();

}

From source file:edu.ur.ir.index.DefaultRtfTextExtractorTest.java

License:Apache License

/**
 * Test basic search within a plain text document
 * /* ww w  . j  av  a  2s .c  om*/
 * @param description
 * @throws Exception 
 */
public void testIndexRtfDocument() throws Exception {

    RepositoryBasedTestHelper repoHelper = new RepositoryBasedTestHelper();
    Repository repo = repoHelper.createRepository("localFileServer", "displayName", "file_database",
            "my_repository", properties.getProperty("a_repo_path"), "default_folder");

    // create the first file to store in the temporary folder
    String tempDirectory = properties.getProperty("ir_core_temp_directory");
    File directory = new File(tempDirectory);

    // helper to create the file
    FileUtil testUtil = new FileUtil();
    testUtil.createDirectory(directory);

    String baseLocation = properties.getProperty("ir_core_location");
    String rtfFile = properties.getProperty("rtf_file");
    File f1 = new File(baseLocation + rtfFile);

    assert f1 != null : "File should not be null";
    assert f1.canRead() : "Should be able to read the file " + f1.getAbsolutePath();

    FileInfo info = repo.getFileDatabase().addFile(f1, "indexed_doc_file");
    info.setExtension("rtf");

    FileTextExtractor documentCreator = new DefaultRtfTextExtractor();
    assert documentCreator.canExtractText(info.getExtension()) : "Cannot create document for extension "
            + info.getExtension();

    String text = documentCreator.getText(new File(info.getFullPath()));

    Document doc = new Document();
    doc.add(new Field("body", text, Field.Store.NO, Field.Index.ANALYZED));
    assert doc != null : "Document should be created";

    // create the lucene directory in memory
    Directory dir;
    try {
        dir = new RAMDirectory();
    } catch (Exception e1) {
        throw new RuntimeException(e1);
    }

    // store the document
    IndexWriter writer = null;
    try {
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35,
                new StandardWithACIIFoldingFilter());
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        writer = new IndexWriter(dir, indexWriterConfig);
        writer.addDocument(doc);
        writer.close();
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
                // do nothing
            }
        }
    }

    // search the document
    try {
        int hits = executeQuery("body", "irFile", dir);
        assert hits == 1 : "Hit count should equal 1 but equals " + hits;

        hits = executeQuery("body", "hello", dir);

        assert hits == 1 : "Hit count should equal 1 but equals " + hits;

    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    repoHelper.cleanUpRepository();

}

From source file:edu.ur.ir.index.DefaultWordTextExtractorTest.java

License:Apache License

/**
 * Test basic search within a plain text document
 * //from  w  ww  .j ava 2 s  . c  o m
 * @param description
 * @throws Exception 
 */
public void testIndexWordDocument() throws Exception {

    RepositoryBasedTestHelper repoHelper = new RepositoryBasedTestHelper();
    Repository repo = repoHelper.createRepository("localFileServer", "displayName", "file_database",
            "my_repository", properties.getProperty("a_repo_path"), "default_folder");

    // create the first file to store in the temporary folder
    String tempDirectory = properties.getProperty("ir_core_temp_directory");
    File directory = new File(tempDirectory);

    // helper to create the file
    FileUtil testUtil = new FileUtil();
    testUtil.createDirectory(directory);

    String baseLocation = properties.getProperty("ir_core_location");
    String wordFile = properties.getProperty("word_file");
    File f1 = new File(baseLocation + wordFile);

    assert f1 != null : "File should not be null";
    assert f1.canRead() : "Should be able to read the file " + f1.getAbsolutePath();

    FileInfo info = repo.getFileDatabase().addFile(f1, "indexed_doc_file");
    info.setExtension("doc");

    FileTextExtractor documentCreator = new DefaultWordTextExtractor();
    assert documentCreator.canExtractText(info.getExtension()) : "Cannot create document for extension "
            + info.getExtension();

    String text = documentCreator.getText(new File(info.getFullPath()));

    Document doc = new Document();
    doc.add(new Field("body", text, Field.Store.NO, Field.Index.ANALYZED));
    assert doc != null : "Document should be created";

    // create the lucene directory in memory
    Directory dir;
    try {
        dir = new RAMDirectory();
    } catch (Exception e1) {
        throw new RuntimeException(e1);
    }

    // store the document
    IndexWriter writer = null;
    try {
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35,
                new StandardWithACIIFoldingFilter());
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        writer = new IndexWriter(dir, indexWriterConfig);
        writer.addDocument(doc);
        writer.close();
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
                // do nothing
            }
        }
    }

    // search the document
    try {
        int hits = executeQuery("body", "irFile", dir);
        assert hits == 1 : "Hit count should equal 1 but equals " + hits;

        hits = executeQuery("body", "hello", dir);

        assert hits == 1 : "Hit count should equal 1 but equals " + hits;

    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    repoHelper.cleanUpRepository();

}

From source file:edu.ur.ir.index.DefaultWordXmlTextExtractorTest.java

License:Apache License

/**
 * Test basic search within a plain text document
 * //from www  . ja v a 2 s  . c o m
 * @param description
 * @throws Exception 
 */
public void testIndexWordXmlDocument() throws Exception {

    RepositoryBasedTestHelper repoHelper = new RepositoryBasedTestHelper();
    Repository repo = repoHelper.createRepository("localFileServer", "displayName", "file_database",
            "my_repository", properties.getProperty("a_repo_path"), "default_folder");

    // create the first file to store in the temporary folder
    String tempDirectory = properties.getProperty("ir_core_temp_directory");
    File directory = new File(tempDirectory);

    // helper to create the file
    FileUtil testUtil = new FileUtil();
    testUtil.createDirectory(directory);

    String baseLocation = properties.getProperty("ir_core_location");
    String wordXmlFile = properties.getProperty("word_xml_file");
    File f1 = new File(baseLocation + wordXmlFile);

    assert f1 != null : "File should not be null";
    assert f1.canRead() : "Should be able to read the file " + f1.getAbsolutePath();

    FileInfo info = repo.getFileDatabase().addFile(f1, "indexed_docx_file");
    info.setExtension("docx");

    FileTextExtractor documentCreator = new DefaultWordXmlTextExtractor();
    assert documentCreator.canExtractText(info.getExtension()) : "Cannot create document for extension "
            + info.getExtension();

    String text = documentCreator.getText(new File(info.getFullPath()));

    Document doc = new Document();
    doc.add(new Field("body", text, Field.Store.NO, Field.Index.ANALYZED));
    assert doc != null : "Document should be created";

    // create the lucene directory in memory
    Directory dir;
    try {
        dir = new RAMDirectory();
    } catch (Exception e1) {
        throw new RuntimeException(e1);
    }

    // store the document
    IndexWriter writer = null;
    try {
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35,
                new StandardWithACIIFoldingFilter());
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        writer = new IndexWriter(dir, indexWriterConfig);
        writer.addDocument(doc);
        writer.close();
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
                // do nothing
            }
        }
    }

    // search the document
    try {
        int hits = executeQuery("body", "irFile", dir);
        assert hits == 1 : "Hit count should equal 1 but equals " + hits;

        hits = executeQuery("body", "hello", dir);

        assert hits == 1 : "Hit count should equal 1 but equals " + hits;

    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    repoHelper.cleanUpRepository();

}

From source file:edu.ur.ir.user.service.DefaultUserGroupIndexService.java

License:Apache License

/**
 * Re-index the specified user groups.  This can be used to re-index 
 * all user groups//from w  w w  . j  av a 2  s  .  co m
 * 
 * @param userGroups - user groups to re index
 * @param userGroupIndexFolder - folder location of the index
 * @param overwriteExistingIndex - if set to true, will overwrite the exiting index.
 */
public void add(List<IrUserGroup> userGroups, File userGroupIndexFolder, boolean overwriteExistingIndex) {
    LinkedList<Document> docs = new LinkedList<Document>();

    for (IrUserGroup g : userGroups) {
        log.debug("Adding user group " + g);
        docs.add(getDocument(g));
    }

    IndexWriter writer = null;
    Directory directory = null;
    try {
        directory = FSDirectory.open(userGroupIndexFolder);

        if (overwriteExistingIndex) {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
            indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            writer = new IndexWriter(directory, indexWriterConfig);
        } else {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
            writer = new IndexWriter(directory, indexWriterConfig);
        }

        for (Document d : docs) {
            writer.addDocument(d);
        }
        writer.commit();
    } catch (Exception e) {
        log.error(e);
        errorEmailService.sendError(e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
                log.error(e);
                try {
                    if (IndexWriter.isLocked(directory)) {
                        IndexWriter.unlock(directory);
                    }
                } catch (IOException e1) {
                    log.error(e1);
                }
            }
        }
        writer = null;
        if (directory != null) {
            try {
                directory.close();
            } catch (Exception e) {
                log.error(e);
            }
        }
        directory = null;
        docs = null;
    }
}

From source file:edu.utsa.sifter.Indexer.java

License:Apache License

static IndexWriter getIndexWriter(final String path, final String stopwords, final SifterConfig conf)
        throws IOException {
    Directory dir = FSDirectory.open(new File(path));

    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_44, getStopList(stopwords));
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setRAMBufferSizeMB(conf.INDEXING_BUFFER_SIZE);
    iwc.setMaxThreadStates(conf.THREAD_POOL_SIZE);
    IndexWriter writer = new IndexWriter(dir, iwc);
    return writer;
}

From source file:edu.utsa.sifter.som.MainSOM.java

License:Apache License

IndexWriter createWriter(final File somIdx, final SifterConfig conf) throws CorruptIndexException, IOException {
    Directory dir = FSDirectory.open(somIdx);

    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setRAMBufferSizeMB(conf.INDEXING_BUFFER_SIZE);
    IndexWriter writer = new IndexWriter(dir, iwc);
    return writer;
}

From source file:edu.virginia.cs.index.AnswerIndexer.java

/**
 * Creates the initial index files on disk
 *
 * @param indexPath/*from   w  ww. ja  va  2 s . com*/
 * @return
 * @throws IOException
 */
private static IndexWriter setupIndex(String indexPath) throws IOException {
    Analyzer analyzer = new SpecialAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    config.setRAMBufferSizeMB(2048.0);

    FSDirectory dir = FSDirectory.open(new File(indexPath));
    IndexWriter writer = new IndexWriter(dir, config);

    return writer;
}

From source file:edu.virginia.cs.index.PostLinkIndexer.java

/**
 * Creates the initial index files on disk
 *
 * @param indexPath// w  w  w . ja va2s.  c o  m
 * @return
 * @throws IOException
 */
private static IndexWriter setupIndex(String indexPath) throws IOException {
    Analyzer analyzer = new SpecialAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    config.setRAMBufferSizeMB(2048.0);

    FSDirectory dir;
    IndexWriter writer = null;
    dir = FSDirectory.open(new File(indexPath));
    writer = new IndexWriter(dir, config);

    return writer;
}