Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode) 

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:natural.language.qa.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            // TODO: Change the link with every release (or: fill in some less error-prone alternative here...)
            + "See http://lucene.apache.org/java/3_1/demo.html for details.";
    String indexPath = "index";
    String docsPath = null;/*from   w  w w  . j a va  2 s .c o  m*/
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call optimize here.  This can be
        // a costly operation, so generally it's only worth
        // it when your index is relatively static (ie you're
        // done adding documents to it):
        //
        // writer.optimize();

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:net.asteasolutions.cinusuidi.sluncho.documentIndex.DocumentIndex.java

public void init() throws IOException {
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    writer = new IndexWriter(dir, iwc);
}

From source file:net.bobah.mail.Indexer.java

License:Apache License

private void runEx() throws Exception {
    final File dir = new File(config.getProperty("net.bobah.mail.local.folder"));
    if (!dir.exists() || !dir.isDirectory()) {
        throw new IllegalArgumentException(String.format("\"%s\" does not exist or is not a directory", dir));
    }//w  w w  .  j  av  a2 s  . c  om

    Collection<File> files = findFiles(dir, new FileFilter() {
        @Override
        public boolean accept(File file) {
            return file.getName().endsWith(".eml");
        }
    }, new Comparator<File>() {
        @Override
        public int compare(File l, File r) {
            return Long.compare(l.lastModified(), r.lastModified());
        }
    });

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    final File indexDir = new File(dir, "index");

    final boolean indexExisted = indexDir.exists();
    if (!indexExisted)
        indexDir.mkdirs();

    final Directory idx = FSDirectory.open(indexDir);
    final IndexWriter writer = new IndexWriter(idx, iwc);

    final IndexReader reader = indexExisted ? DirectoryReader.open(idx) : null;
    final IndexSearcher searcher = indexExisted ? new IndexSearcher(reader) : null;

    //final AtomicLong counter = new AtomicLong(0l);
    try {
        for (final File file : files) {
            executor.submit(new Runnable() {
                @Override
                public void run() {
                    try {
                        index(file, writer, searcher);
                        //if (counter.incrementAndGet() % 100 == 0) writer.commit(); // TODO: VL: make batch size configurable
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            });
        }

        shutdownExecutor(executor, log);

        // TODO: VL: delete stale documents from the index

        writer.commit();
        log.info("committed index updates");

        searcher.search(new MatchAllDocsQuery(), new Collector() {
            @Override
            public void setScorer(Scorer scorer) throws IOException {
            }

            @Override
            public void setNextReader(AtomicReaderContext unused) throws IOException {
            }

            @Override
            public void collect(int docID) throws IOException {
                Document doc = reader.document(docID);
                final String path = doc.get("path");
                if (path != null) {
                    try {
                        final File file = new File(path);
                        if (!file.exists()) {
                            log.info("deleting index for {}", doc.get("id"));
                            writer.deleteDocuments(new Term("id", doc.get("id")));
                        }
                    } catch (SecurityException e) {
                        log.error("exception", e);
                    }
                }
            }

            @Override
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }
        });

        writer.commit();
        log.info("committed index deletions");

    } finally {
        try {
            // close writer without commit (see explicit commits above)
            writer.rollback();
        } catch (IOException e) {
            log.error("exception while closing writer", e);
        }
    }
}

From source file:net.faustinelli.javafx.ensemble.search.BuildEnsembleSearchIndex.java

License:Open Source License

public static void main(String[] args) throws Exception {
    File samplesFilesDir = new File("build/classes/ensemble/");
    File indexDir = new File("build/classes/ensemble/search/index");
    File docDir = new File("../../../artifacts/sdk/docs/api");
    File samplesDir = new File("src/ensemble/samples");
    // create index
    ///System.out.println("Indexing to directory '" + indexDir + "'...");
    long start = System.currentTimeMillis();
    Directory dir = FSDirectory.open(indexDir);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    // generate and write index of all java doc and samples
    IndexWriter writer = new IndexWriter(dir, iwc);

    List<String> samplesFileList = new ArrayList<String>();

    indexSamples(writer, samplesDir, samplesFileList);
    try {/*  ww w  .j  av  a 2 s  . c  o m*/
        indexJavaDocAllClasses(writer, docDir);
    } catch (Exception e) {
        System.out.println(
                "\nWarning: We were not able to locate the JavaFX API documentation for your build environment.\n"
                        + "Ensemble search will not include the API documentation.\n");
    }
    writer.close();
    // create a listAll.txt file that is used
    FileWriter listAllOut = new FileWriter(new File(indexDir, "listAll.txt"));
    for (String fileName : dir.listAll()) {
        if (!"listAll.txt".equals(fileName)) { // don't include the "listAll.txt" file
            Long length = dir.fileLength(fileName);
            listAllOut.write(fileName);
            listAllOut.write(':');
            listAllOut.write(length.toString());
            listAllOut.write('\n');
        }
    }
    listAllOut.flush();
    listAllOut.close();

    FileWriter sampleFilesCache = new FileWriter(
            new File(samplesFilesDir, "net/faustinelli/javafx/ensemble/samplesAll.txt"));
    for (String oneSample : samplesFileList) {
        sampleFilesCache.write(oneSample);
        sampleFilesCache.write('\n');
    }
    sampleFilesCache.flush();
    sampleFilesCache.close();

    // print time taken
    ///System.out.println(System.currentTimeMillis() - start + " total milliseconds");
}

From source file:net.mad.ads.db.db.index.AdDBLuceneIndex.java

License:Open Source License

@Override
public void open() throws IOException {
    index = new RAMDirectory();
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33, new KeywordAnalyzer());
    config.setOpenMode(OpenMode.CREATE);
    writer = new IndexWriter(index, config);

    this.reader = IndexReader.open(this.writer, true);
    this.searcher = new IndexSearcher(this.reader);
}

From source file:net.oneandone.pommes.model.Database.java

License:Apache License

public void remove(List<String> prefixes) throws IOException {
    IndexWriter writer;/* ww  w  .  j av a  2s . c o  m*/
    IndexWriterConfig config;

    close();
    config = new IndexWriterConfig(Version.LUCENE_4_9, null);
    config.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    writer = new IndexWriter(getIndexLuceneDirectory(), config);
    for (String prefix : prefixes) {
        writer.deleteDocuments(new PrefixQuery(new Term(ORIGIN, prefix)));
    }
    writer.close();
}

From source file:net.oneandone.pommes.model.Database.java

License:Apache License

public void index(Iterator<Document> iterator) throws IOException {
    IndexWriter writer;/*from  ww w .  j ava 2  s .  com*/
    IndexWriterConfig config;
    Document doc;

    close();
    // no analyzer, I have String fields only
    config = new IndexWriterConfig(Version.LUCENE_4_9, null);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    writer = new IndexWriter(getIndexLuceneDirectory(), config);
    while (iterator.hasNext()) {
        doc = iterator.next();
        writer.updateDocument(new Term(ORIGIN, doc.get(ORIGIN)), doc);
    }
    writer.close();
}

From source file:net.ontopia.infoset.fulltext.impl.lucene.LuceneIndexer.java

License:Apache License

protected IndexWriter getWriter() {
    if (writer == null) {
        try {//from w ww .j  a v a  2s  .  co m
            if (reader != null) {
                reader.close();
                reader = null;
            }
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            writer = new IndexWriter(dir, config);
        } catch (IOException e) {
            throw new OntopiaRuntimeException(e);
        }
    }
    return writer;
}

From source file:net.riezebos.thoth.content.search.Indexer.java

License:Apache License

/**
 * Get the actual implementation of the indexWriter
 *
 * @param wipeIndex// w  w w.j  ava2  s.com
 * @return
 * @throws IOException
 */
protected IndexWriter getWriter(boolean wipeIndex) throws IOException {
    Directory dir = FSDirectory.open(Paths.get(indexFolder));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

    if (wipeIndex) {
        iwc.setOpenMode(OpenMode.CREATE);
    } else {
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    }

    IndexWriter writer = new IndexWriter(dir, iwc);
    return writer;
}

From source file:net.semanticmetadata.lire.indexing.parallel.ParallelIndexer.java

License:Open Source License

public void run() {
    IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new StandardAnalyzer());
    config.setOpenMode(openMode);
    config.setCodec(new LireCustomCodec());
    try {/*from  w w  w .jav  a 2  s. c  o m*/
        if (imageDirectory != null)
            System.out.println("Getting all images in " + imageDirectory + ".");
        writer = new IndexWriter(FSDirectory.open(new File(indexPath)), config);
        if (imageList == null) {
            files = FileUtils.getAllImages(new File(imageDirectory), true);
        } else {
            files = new LinkedList<String>();
            BufferedReader br = new BufferedReader(new FileReader(imageList));
            String line = null;
            while ((line = br.readLine()) != null) {
                if (line.trim().length() > 3)
                    files.add(line.trim());
            }
        }
        numImages = files.size();
        System.out.printf("Indexing %d images\n", files.size());
        Thread p = new Thread(new Producer());
        p.start();
        LinkedList<Thread> threads = new LinkedList<Thread>();
        long l = System.currentTimeMillis();
        for (int i = 0; i < numberOfThreads; i++) {
            Thread c = new Thread(new Consumer());
            c.start();
            threads.add(c);
        }
        Thread m = new Thread(new Monitoring());
        m.start();
        for (Iterator<Thread> iterator = threads.iterator(); iterator.hasNext();) {
            iterator.next().join();
        }
        long l1 = System.currentTimeMillis() - l;
        int seconds = (int) (l1 / 1000);
        int minutes = seconds / 60;
        seconds = seconds % 60;
        // System.out.println("Analyzed " + overallCount + " images in " + seconds + " seconds, ~" + ((overallCount>0)?(l1 / overallCount):"n.a.") + " ms each.");
        System.out.printf("Analyzed %d images in %03d:%02d ~ %3.2f ms each.\n", overallCount, minutes, seconds,
                ((overallCount > 0) ? ((float) l1 / (float) overallCount) : -1f));
        writer.commit();
        writer.forceMerge(1);
        writer.close();
        threadFinished = true;
        // add local feature hist here
    } catch (IOException e) {
        e.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}