Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode) 

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:edu.virginia.cs.index.UserIndexer.java

/**
 * Creates the initial index files on disk
 *
 * @param indexPath// ww w  . j  a  va  2s  .  com
 * @return
 * @throws IOException
 */
private static IndexWriter setupIndex(String indexPath) throws IOException {
    Analyzer analyzer = new SpecialAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
    config.setOpenMode(OpenMode.CREATE);
    config.setRAMBufferSizeMB(2048.0);

    FSDirectory dir = FSDirectory.open(new File(indexPath));
    IndexWriter writer = new IndexWriter(dir, config);

    return writer;
}

From source file:ensemble.compiletime.search.BuildEnsembleSearchIndex.java

License:Open Source License

public static void buildSearchIndex(List<Sample> allSamples, String javaDocBaseUrl,
        String javafxDocumentationHome, File indexDir) {
    try {/*from w w w .  j  av a2s  .  c o  m*/
        List<Document> docs = new ArrayList<>();
        List<Callable<List<Document>>> tasks = new ArrayList<>();
        // create callables to collect data
        System.out.println("Creating Documents for Samples...");
        docs.addAll(indexSamples(allSamples));
        System.out.println("Creating tasks for getting all documentation...");
        tasks.addAll(indexJavaDocAllClasses(javaDocBaseUrl));
        tasks.addAll(indexAllDocumentation(javafxDocumentationHome));
        // execute all the tasks in 32 threads, collecting all the documents to write
        System.out.println("Executing tasks getting all documentation...");
        try {
            ThreadPoolExecutor executor = new ThreadPoolExecutor(32, 32, 30, TimeUnit.SECONDS,
                    new LinkedBlockingQueue());
            executor.setThreadFactory(new ThreadFactory() {
                int index = 0;

                @Override
                public Thread newThread(Runnable r) {
                    Thread thread = new Thread(r, "Thread-" + (++index));
                    thread.setDaemon(true);
                    return thread;
                }
            });
            List<Future<List<Document>>> results = executor.invokeAll(tasks);
            for (Future<List<Document>> future : results) {
                docs.addAll(future.get());
            }
        } catch (ExecutionException | InterruptedException ex) {
            Logger.getLogger(BuildEnsembleSearchIndex.class.getName()).log(Level.SEVERE, null, ex);
        }
        // create index
        System.out.println("Indexing to directory '" + indexDir + "'...");
        Directory dir = FSDirectory.open(indexDir);
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
        iwc.setOpenMode(OpenMode.CREATE);
        try (IndexWriter writer = new IndexWriter(dir, iwc)) {
            // write all docs
            System.out.println("Writing [" + docs.size() + "] documents to index....");
            writer.addDocuments(docs);
            // optimize the writen index
            System.out.println("Optimizing search index....");
            writer.optimize();
            System.out.println("NUMBER OF INDEXED DOCUMENTS = [" + writer.numDocs() + "]");
        }
        // write file listing all the search index files, so we know what
        // is in the jar file at runtime
        try (FileWriter listAllOut = new FileWriter(new File(indexDir, "listAll.txt"))) {
            for (String fileName : dir.listAll()) {
                if (!"listAll.txt".equals(fileName)) { // don't include the "listAll.txt" file
                    Long length = dir.fileLength(fileName);
                    listAllOut.write(fileName);
                    listAllOut.write(':');
                    listAllOut.write(length.toString());
                    listAllOut.write('\n');
                }
            }
            listAllOut.flush();
        }
        System.out.println("Finished writing search index to directory '" + indexDir);
    } catch (IOException ex) {
        Logger.getLogger(BuildEnsembleSearchIndex.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:ensemble.search.BuildEnsembleSearchIndex.java

License:Open Source License

public static void main(String[] args) throws Exception {
    /*        File samplesFilesDir = new File("build/classes/ensemble/");
            File indexDir = new File("build/classes/ensemble/search/index");
            File docDir = new File("../../../artifacts/sdk/docs/api");
            File samplesDir = new File("C:\Code\Sandbox\sandbox-repo\learning-java\fx\src\main\java\ensemble\samples");
    *//*  w  ww. j ava  2  s .  c  o  m*/
    File samplesFilesDir = new File("build/classes/ensemble/");
    File indexDir = new File("build/classes/ensemble/search/index");
    File docDir = new File("../../../artifacts/sdk/docs/api");
    File samplesDir = new File("src/ensemble/samples");
    // create index
    ///System.out.println("Indexing to directory '" + indexDir + "'...");
    long start = System.currentTimeMillis();
    Directory dir = FSDirectory.open(indexDir);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    // generate and write index of all java doc and samples
    IndexWriter writer = new IndexWriter(dir, iwc);

    List<String> samplesFileList = new ArrayList<String>();

    indexSamples(writer, samplesDir, samplesFileList);
    try {
        indexJavaDocAllClasses(writer, docDir);
    } catch (Exception e) {
        System.out.println(
                "\nWarning: We were not able to locate the JavaFX API documentation for your build environment.\n"
                        + "Ensemble search will not include the API documentation.\n");
    }
    writer.close();
    // create a listAll.txt file that is used
    FileWriter listAllOut = new FileWriter(new File(indexDir, "listAll.txt"));
    for (String fileName : dir.listAll()) {
        if (!"listAll.txt".equals(fileName)) { // don't include the "listAll.txt" file
            Long length = dir.fileLength(fileName);
            listAllOut.write(fileName);
            listAllOut.write(':');
            listAllOut.write(length.toString());
            listAllOut.write('\n');
        }
    }
    listAllOut.flush();
    listAllOut.close();

    FileWriter sampleFilesCache = new FileWriter(new File(samplesFilesDir, "samplesAll.txt"));
    for (String oneSample : samplesFileList) {
        sampleFilesCache.write(oneSample);
        sampleFilesCache.write('\n');
    }
    sampleFilesCache.flush();
    sampleFilesCache.close();

    // print time taken
    ///System.out.println(System.currentTimeMillis() - start + " total milliseconds");
}

From source file:es.ua.corpus.indexer.Indexer.java

protected void _initializeComponents(String indexPath) throws IOException {
    File indexDir = new File(indexPath);
    Directory directory = FSDirectory.open(indexDir);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    writer = new IndexWriter(directory, config);
}

From source file:es.ua.labidiomas.corpus.index.Indexer.java

protected void _initializeComponents(String indexPath) throws IOException {
    File indexDir = new File(indexPath);
    directory = FSDirectory.open(indexDir);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    config.setWriteLockTimeout(5000l);// ww  w .j  a v  a  2 s  .c  o m
    writer = new IndexWriter(directory, config);
}

From source file:es.ua.labidiomas.corpus.index.Indexer.java

private void deleteNgrams(String textID, String lang, String fileSeparator) throws IOException {
    for (int i = 1; i <= 4; i++) {
        File indexDir = new File(
                indexPath + fileSeparator + "ngrams" + fileSeparator + i + fileSeparator + lang);
        Directory directory = null;//from  w ww .  ja  v  a 2  s  .c  o  m
        IndexWriter indexEraser = null;
        try {
            directory = FSDirectory.open(indexDir);
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            config.setWriteLockTimeout(5000l);
            indexEraser = new IndexWriter(directory, config);
            Term term = new Term("textID", textID);
            indexEraser.deleteDocuments(term);
            indexEraser.commit();
        } finally {
            if (directory != null) {
                directory.close();
            }
            if (indexEraser != null) {
                indexEraser.close();
            }
        }
    }
}

From source file:es.unizar.iaaa.crawler.butler.index.IndexFiles.java

License:Apache License

/**
 * Index all text files under a directory.
 *//*from  w  w  w . j  a  va 2s .  c o  m*/
public void index(String indexPath, File file) {

    if (file.exists()) {

        Date start = new Date();
        try {
            LOGGER.info("Indexing to directory '" + indexPath);
            Directory dir = FSDirectory.open(new File(indexPath));
            Analyzer analyzer = new EnglishAnalyzer();
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_4, analyzer);

            iwc.setOpenMode(OpenMode.CREATE);

            // Optional: for better indexing performance, if you
            // are indexing many documents, increase the RAM
            // buffer. But if you do this, increase the max heap
            // size to the JVM (eg add -Xmx512m or -Xmx1g):
            //
            // iwc.setRAMBufferSizeMB(256.0);
            IndexWriter writer = new IndexWriter(dir, iwc);
            indexDocs(writer, file);

            // NOTE: if you want to maximize search performance,
            // you can optionally call forceMerge here. This can be
            // a terribly costly operation, so generally it's only
            // worth it when your index is relatively static (ie
            // you're done adding documents to it):
            //
            // writer.forceMerge(1);
            writer.close();

            Date end = new Date();
            LOGGER.info(end.getTime() - start.getTime() + " total milliseconds");

        } catch (IOException e) {
            LOGGER.error(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
        }
    } else {
        LOGGER.info(file.getName() + " don't exists");
    }
}

From source file:eu.eexcess.sourceselection.redde.indexer.BinaryIndexResource.java

License:Apache License

/**
 * opens the sample index for writing; overwrites existing one
 * //from w  w  w.jav  a 2 s  .  c  o m
 * @param ramBufferSizeMB
 *            determines the amount of RAM that may be used for buffering
 * @throws IOException
 *             if unable to open/create index
 */
void openOutIndex(double ramBufferSizeMB) throws IOException {

    try {
        Directory indexDirectory = FSDirectory.open(new File(outIndexPath));
        Analyzer analyzer = new EnglishAnalyzer();
        IndexWriterConfig writerConfig = new IndexWriterConfig(luceneVersion, analyzer);
        writerConfig.setOpenMode(OpenMode.CREATE);
        writerConfig.setRAMBufferSizeMB(ramBufferSizeMB);
        outIndexWriter = new IndexWriter(indexDirectory, writerConfig);

    } catch (IOException e) {
        logger.log(Level.SEVERE, "unable to open/create index at [" + outIndexPath + "]", e);
        throw e;
    }
}

From source file:eu.eexcess.sourceselection.redde.indexer.TrecToLuceneIndexBuilder.java

License:Apache License

/**
 * Builds/overwrites existing Lucene index using TREC documents as source
 *///from  w  w w  .jav a2 s  .  c om
public void index() {
    Date startTimestamp = new Date();
    final File documentsDirectory = new File(documentsPath);

    if (!documentsDirectory.exists() || !documentsDirectory.canRead()) {

        logger.severe("cannot access document directory [" + documentsDirectory.getAbsolutePath() + "]");

    } else {

        try {
            logger.info("processing directory [" + documentsPath + "] to index [" + indexPath + "]");

            Directory indexDirectory = FSDirectory.open(new File(indexPath));
            Analyzer analyzer = new EnglishAnalyzer();
            IndexWriterConfig writerConfig = new IndexWriterConfig(luceneVersion, analyzer);

            writerConfig.setOpenMode(OpenMode.CREATE);
            writerConfig.setRAMBufferSizeMB(ramBufferSize);

            IndexWriter indexWriter = new IndexWriter(indexDirectory, writerConfig);
            indexDocs(indexWriter, documentsDirectory);

            indexWriter.commit();
            indexWriter.close();

            Date stopTimestamp = new Date();
            logger.info("processed [" + dirsCount + "] dirs [" + filesCount + "] files [" + documentsTotal
                    + "] documents [" + filesSkipped + "] files skipped in ["
                    + (stopTimestamp.getTime() - startTimestamp.getTime()) + "] ms]");

        } catch (IOException e) {
            logger.log(Level.SEVERE, "failed indexing documents", e);
        }
    }
}

From source file:examples.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {

    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";

    String indexPath = "index";
    String docsPath = null;//  ww  w . j  av  a 2  s . c om
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}