Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode)

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:kbp2013.index.IndexWikipediaCorpus_v2.java

License:Open Source License

/**
 * //from   w ww.  j  a  v  a 2s. c  o m
 * 
 * @param args
 * @throws IOException
 * @throws Exception
 */
public static void main(String[] args) throws IOException, Exception {

    initializeFromDefault();

    System.out.println("Indexing Wikipedia Dump to directory '" + wikiluceneIndex + "'...");

    INDEX_DIR = new File(wikiluceneIndex);
    if (INDEX_DIR.exists() && create == 1) {
        System.out.println("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first");
        System.exit(1);
    }

    if (wikidump.endsWith(".bzip2") == false) {
        System.out.println("NOTICE: The Wikipedia dump must be in bzip2 format.");
        System.exit(0);
    }

    Directory dir = FSDirectory.open(new File(wikiluceneIndex));

    // Open lucene stuff
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer);
    // configure Lucene Stuff
    iwc.setMaxThreadStates(100);

    // manage append mode
    if (create == 0) {
        // add new document to an existing index
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        // if appending, check index
        if (checkindex == 1) {
            System.out.println("Checking index ...");
            CheckIndex ci = new CheckIndex(dir);
            ci.checkIndex();
            System.out.println("End of Checking index");
        }

    } else {
        iwc.setOpenMode(OpenMode.CREATE);
    }

    // build writer
    IndexWriter writer = new IndexWriter(dir, iwc);

    // --------------------------
    //
    // Open the Wikipedia Dump
    //
    //---------------------------
    //Processing the large xml file in bzip2 format
    InputStream fileInputStream = new BufferedInputStream(new FileInputStream(wikidump));
    BZip2InputStream inputStream = new BZip2InputStream(fileInputStream, false);

    InputStreamReader isr = new InputStreamReader(inputStream);
    BufferedReader reader = new BufferedReader(isr);

    String line;
    //temporary stores the content of each file
    StringBuilder pageBuffer = new StringBuilder();
    //contains the title of the current page
    String docTitle = "";
    //contains the content of the current page
    String content = "";

    int docCount = 0; // number of documents that have been stored
    Date start = new Date(); //log the time when the indexing process starts

    while ((line = reader.readLine()) != null) {
        if (StringUtils.contains(line, "</page>") == true) {
            if (pageBuffer.length() > 0) {
                //get the title of the page
                int startIndex = pageBuffer.toString().indexOf("<title>") + 7;
                int endIndex = pageBuffer.toString().indexOf("</title>");
                docTitle = pageBuffer.toString().substring(startIndex, endIndex);
                //get the content of the page
                int startPageIndex = pageBuffer.toString().indexOf("<page>");
                content = pageBuffer.toString().substring(startPageIndex) + "</page>";
                //verify the namespace of the page, it should be 0
                int namespaceValue = Integer
                        .parseInt(content.substring(content.indexOf("<ns>") + 4, content.indexOf("</ns>")));
                if (namespaceValue != 0) {
                    //reset buffer
                    pageBuffer = new StringBuilder();
                    continue;
                }
                //verify that it is not a redirect page
                if (content.indexOf("<text xml:space=\"preserve\">#REDIRECT") != -1) {
                    //reset buffer
                    pageBuffer = new StringBuilder();
                    continue;
                } else {
                    indexDocument(writer, content, docTitle.toLowerCase());
                    System.err.println("Processed " + docCount + " documents");
                }
                docCount++;

            }
            //reset buffer
            pageBuffer = new StringBuilder();
        }
        pageBuffer.append(line);

    }
    fileInputStream.close();
    writer.close();

    Date end = new Date();

    // close properly the index writer
    // !! Caution !! in case of error, if this is not closed, the index is corrupted
    // and have to be regenerated
    reader.close();

    System.err.println(end.getTime() - start.getTime() + " total milliseconds");

}

From source file:kpl.db.FullTextTrigger.java

/**
 * Get the Lucene index access// w w w. ja  v  a  2s .c om
 *
 * @param   conn                SQL connection
 * @throws  SQLException        Unable to access the Lucene index
 */
private static void getIndexAccess(Connection conn) throws SQLException {
    if (!isActive) {
        throw new SQLException("KRS is no longer active");
    }
    boolean obtainedUpdateLock = false;
    if (!indexLock.writeLock().hasLock()) {
        indexLock.updateLock().lock();
        obtainedUpdateLock = true;
    }
    try {
        if (indexPath == null || indexWriter == null) {
            indexLock.writeLock().lock();
            try {
                if (indexPath == null) {
                    getIndexPath(conn);
                }
                if (directory == null) {
                    directory = FSDirectory.open(indexPath);
                }
                if (indexWriter == null) {
                    IndexWriterConfig config = new IndexWriterConfig(analyzer);
                    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
                    indexWriter = new IndexWriter(directory, config);
                    Document document = new Document();
                    document.add(new StringField("_QUERY", "_CONTROL_DOCUMENT_", Field.Store.YES));
                    indexWriter.updateDocument(new Term("_QUERY", "_CONTROL_DOCUMENT_"), document);
                    indexWriter.commit();
                    indexReader = DirectoryReader.open(directory);
                    indexSearcher = new IndexSearcher(indexReader);
                }
            } finally {
                indexLock.writeLock().unlock();
            }
        }
    } catch (IOException | SQLException exc) {
        Logger.logErrorMessage("Unable to access the Lucene index", exc);
        throw new SQLException("Unable to access the Lucene index", exc);
    } finally {
        if (obtainedUpdateLock) {
            indexLock.updateLock().unlock();
        }
    }
}

From source file:l3.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {

    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;/*from   w  ww .  j  ava  2  s  .c om*/
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer. But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here. This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:l4.CompoundIndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//w ww .  j a va2 s.c o m
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
        // TieredMergePolicy mergePolicy = new TieredMergePolicy();
        // iwc.setMergePolicy(mergePolicy.setNoCFSRatio(1.0d));

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer. But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here. This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:lab_mri.SearchEngine.java

public void open() throws IOException {
    IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, new CustomAnalyzer());
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    writer = new IndexWriter(FSDirectory.open(dir), config);
}

From source file:lia.chapter2.IndexingTest.java

License:Apache License

private IndexWriter getWriter() throws IOException { // 2
    final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new StandardAnalyzer());
    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    return new IndexWriter(directory, indexWriterConfig);
}

From source file:lia.recent.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = "data";
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];/* www  .j  av a  2 s  .  c o m*/
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_46, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:lsre.utils.LuceneUtils.java

License:Open Source License

/**
 * Creates an IndexWriter for given index path, with given analyzer.
 *
 * @param directory the path to the index directory
 * @param create    set to true if you want to create a new index
 * @param analyzer  gives the analyzer used for the Indexwriter.
 * @return an IndexWriter/*from ww w  .j a  v  a  2  s.c om*/
 * @throws IOException
 */
public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer)
        throws IOException {
    // set the analyzer according to the method params
    Analyzer tmpAnalyzer = null;
    if (analyzer == AnalyzerType.SimpleAnalyzer)
        tmpAnalyzer = new SimpleAnalyzer(); // LetterTokenizer with LowerCaseFilter
    else if (analyzer == AnalyzerType.WhitespaceAnalyzer)
        tmpAnalyzer = new WhitespaceAnalyzer(); // WhitespaceTokenizer
    else if (analyzer == AnalyzerType.KeywordAnalyzer)
        tmpAnalyzer = new KeywordAnalyzer(); // entire string as one token.
    else if (analyzer == AnalyzerType.StandardAnalyzer)
        tmpAnalyzer = new StandardAnalyzer();

    // The config
    IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer);
    config.setRAMBufferSizeMB(512);
    config.setCommitOnClose(true);
    if (create)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists.
    else
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise.

    config.setCodec(new LsreCustomCodec());
    return new IndexWriter(directory, config);
}

From source file:lsre.utils.LuceneUtils.java

License:Open Source License

public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer,
        double RAMBufferSize) throws IOException {
    // set the analyzer according to the method params
    Analyzer tmpAnalyzer = null;/*from w w  w  .  j  a  v  a  2  s.  co m*/
    if (analyzer == AnalyzerType.SimpleAnalyzer)
        tmpAnalyzer = new SimpleAnalyzer();
    else if (analyzer == AnalyzerType.WhitespaceAnalyzer)
        tmpAnalyzer = new WhitespaceAnalyzer();

    // The config
    IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer);
    if (create)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists.
    else
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise.
    config.setRAMBufferSizeMB(RAMBufferSize);
    config.setCodec(new LsreCustomCodec());
    return new IndexWriter(directory, config);
}

From source file:lucene.demo.search.FileIndexer.java

License:Apache License

public void index() {
    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);/*from   w  ww .j  a v a2s. c  o m*/
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));

        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer);

        if (createMode) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);
        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}