Example usage for org.apache.lucene.index IndexWriter IndexWriter

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter IndexWriter.

Prototype

public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException

Source Link

Document

Constructs a new IndexWriter per the settings given in conf.

Usage

From source file:com.gmail.mosoft521.luceneDemo.IndexFiles.java

License:Apache License

/**
 * Index all text files under a directory.
 *//*from www . ja  va  2 s . c om*/
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        // :Post-Release-Update-Version.LUCENE_XY:
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.google.gerrit.lucene.SubIndex.java

License:Apache License

SubIndex(Directory dir, final String dirName, GerritIndexWriterConfig writerConfig,
        SearcherFactory searcherFactory) throws IOException {
    this.dir = dir;
    IndexWriter delegateWriter;/*from ww  w  .j a  va 2s .  co m*/
    long commitPeriod = writerConfig.getCommitWithinMs();

    if (commitPeriod < 0) {
        delegateWriter = new IndexWriter(dir, writerConfig.getLuceneConfig());
    } else if (commitPeriod == 0) {
        delegateWriter = new AutoCommitWriter(dir, writerConfig.getLuceneConfig(), true);
    } else {
        final AutoCommitWriter autoCommitWriter = new AutoCommitWriter(dir, writerConfig.getLuceneConfig(),
                false);
        delegateWriter = autoCommitWriter;

        new ScheduledThreadPoolExecutor(1,
                new ThreadFactoryBuilder().setNameFormat("Commit-%d " + dirName).setDaemon(true).build())
                        .scheduleAtFixedRate(new Runnable() {
                            @Override
                            public void run() {
                                try {
                                    if (autoCommitWriter.hasUncommittedChanges()) {
                                        autoCommitWriter.manualFlush();
                                        autoCommitWriter.commit();
                                    }
                                } catch (IOException e) {
                                    log.error("Error committing Lucene index " + dirName, e);
                                } catch (OutOfMemoryError e) {
                                    log.error("Error committing Lucene index " + dirName, e);
                                    try {
                                        autoCommitWriter.close();
                                    } catch (IOException e2) {
                                        log.error("SEVERE: Error closing Lucene index " + dirName
                                                + " after OOM; index may be corrupted.", e);
                                    }
                                }
                            }
                        }, commitPeriod, commitPeriod, MILLISECONDS);
    }
    writer = new TrackingIndexWriter(delegateWriter);
    searcherManager = new WrappableSearcherManager(writer.getIndexWriter(), true, searcherFactory);

    notDoneNrtFutures = Sets.newConcurrentHashSet();

    reopenThread = new ControlledRealTimeReopenThread<>(writer, searcherManager,
            0.500 /* maximum stale age (seconds) */, 0.010 /* minimum stale age (seconds) */);
    reopenThread.setName("NRT " + dirName);
    reopenThread.setPriority(Math.min(Thread.currentThread().getPriority() + 2, Thread.MAX_PRIORITY));
    reopenThread.setDaemon(true);

    // This must be added after the reopen thread is created. The reopen thread
    // adds its own listener which copies its internally last-refreshed
    // generation to the searching generation. removeIfDone() depends on the
    // searching generation being up to date when calling
    // reopenThread.waitForGeneration(gen, 0), therefore the reopen thread's
    // internal listener needs to be called first.
    // TODO(dborowitz): This may have been fixed by
    // http://issues.apache.org/jira/browse/LUCENE-5461
    searcherManager.addListener(new RefreshListener() {
        @Override
        public void beforeRefresh() throws IOException {
        }

        @Override
        public void afterRefresh(boolean didRefresh) throws IOException {
            for (NrtFuture f : notDoneNrtFutures) {
                f.removeIfDone();
            }
        }
    });

    reopenThread.start();
}

From source file:com.google.gerrit.server.change.ReviewerSuggestionCache.java

License:Apache License

private IndexSearcher index() throws IOException, OrmException {
    RAMDirectory idx = new RAMDirectory();
    IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer(CharArraySet.EMPTY_SET));
    config.setOpenMode(OpenMode.CREATE);

    try (IndexWriter writer = new IndexWriter(idx, config)) {
        for (Account a : db.get().accounts().all()) {
            if (a.isActive()) {
                addAccount(writer, a);/*from w  w w .  java 2  s . c  om*/
            }
        }
    }

    return new IndexSearcher(DirectoryReader.open(idx));
}

From source file:com.gprasad.searchwithlucene.Indexer.java

private static void createIndex(String indexPath) throws IOException {
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
    indexWriterConfig.setOpenMode(OpenMode.CREATE);
    writer = new IndexWriter(dir, indexWriterConfig);
}

From source file:com.graphhopper.compare.misc.LuceneStorage.java

License:Apache License

public boolean init(boolean forceCreate) {
    try {//from   ww w. ja va  2 s.  c o  m
        File file = new File("osm.lucene.test");
        if (forceCreate)
            Helper.deleteDir(file);

        // germany.osm => 3.6 GB on disc for nodes only, 1.5 GB memory usage at the end of the nodes
        Directory dir = FSDirectory.open(file);
        IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_35, new KeywordAnalyzer());
        LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
        mp.setMaxMergeMB(3000);
        cfg.setRAMBufferSizeMB(128);
        cfg.setTermIndexInterval(512);
        cfg.setMergePolicy(mp);

        // specify different formats for id fields etc
        // -> this breaks 16 of our tests!? Lucene Bug?
        //            cfg.setCodec(new Lucene40Codec() {
        //
        //                @Override public PostingsFormat getPostingsFormatForField(String field) {
        //                    return new Pulsing40PostingsFormat();
        //                }
        //            });

        // cfg.setMaxThreadStates(8);
        boolean create = !IndexReader.indexExists(dir);
        cfg.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND);
        writer = new IndexWriter(dir, cfg);
        return true;
    } catch (Exception ex) {
        logger.error("cannot init lucene storage", ex);
        return false;
    }
}

From source file:com.greplin.interval.BaseIntervalQueryTest.java

License:Apache License

@Before
public void setUp() throws IOException {
    RAMDirectory ramDirectory = new RAMDirectory();
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, new SimpleAnalyzer(Version.LUCENE_35));
    this.indexWriter = new IndexWriter(ramDirectory, config);
}

From source file:com.greplin.lucene.filter.PhraseFilterBenchmark.java

License:Apache License

public static void main(String[] argv) {
    Directory directory = new RAMDirectory();
    try {//from w w w.  j  av  a 2 s  .  com
        IndexWriter writer = new IndexWriter(directory,
                new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32)));
        int done = 0;
        for (int i = 0; i < NUMBER_OF_SEGMENTS; i++) {
            int remaining = NUMBER_OF_SEGMENTS - i;
            int numberOfDocs;
            if (remaining == 1) {
                numberOfDocs = TOTAL_DOCS - done;
            } else {
                numberOfDocs = RANDOM.nextInt(TOTAL_DOCS - done - remaining) + 1;
            }
            done += numberOfDocs;
            System.out.println("Segment #" + i + " has " + numberOfDocs + " docs");

            for (int d = 0; d < numberOfDocs; d++) {
                int wordCount = RANDOM.nextInt(WORDS_PER_DOC_DEVIATION * 2) + AVERAGE_WORDS_PER_DOC
                        - WORDS_PER_DOC_DEVIATION;
                Document doc = new Document();
                doc.add(new Field("f", Joiner.on(' ').join(words(wordCount)), Field.Store.YES,
                        Field.Index.ANALYZED));
                doc.add(new Field("second", RANDOM.nextInt(100) < SECOND_FIELD_MATCH_PERCENTAGE ? "yes" : "no",
                        Field.Store.NO, Field.Index.ANALYZED));
                writer.addDocument(doc);
            }
            writer.commit();
        }
        writer.close();

        IndexReader reader = IndexReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);

        String[][] queries = new String[TOTAL_QUERIES][];
        Term[][] terms = new Term[TOTAL_QUERIES][];

        for (int q = 0; q < TOTAL_QUERIES; q++) {
            queries[q] = words(WORDS_PER_QUERY[RANDOM.nextInt(WORDS_PER_QUERY.length)]);
            terms[q] = new Term[queries[q].length];
            for (int qw = 0; qw < queries[q].length; qw++) {
                terms[q][qw] = new Term(FIELD, queries[q][qw]);
            }
        }

        // Warm up.
        new PhraseFilter(FIELD, queries[0]).getDocIdSet(reader);

        for (int round = 0; round < ROUNDS; round++) {
            System.out.println();
            String name1 = "filter";
            String name2 = "query";

            long ms1 = 0, ms2 = 0;
            for (int step = 0; step < 2; step++) {
                System.gc();
                System.gc();
                System.gc();

                if (step == (round & 1)) {
                    long millis = System.currentTimeMillis();
                    long hits = 0;
                    for (String[] queryWords : queries) {
                        PhraseFilter pf = new PhraseFilter(
                                new FilterIntersectionProvider(TermsFilter.from(new Term("second", "yes"))),
                                FIELD, queryWords);
                        hits += searcher.search(new FilteredQuery(new MatchAllDocsQuery(), pf), 1).totalHits;
                    }
                    ms1 = System.currentTimeMillis() - millis;
                    System.out.println("Finished " + name1 + " in " + ms1 + "ms with " + hits + " hits");
                } else {
                    long millis = System.currentTimeMillis();
                    long hits = 0;
                    for (Term[] queryTerms : terms) {
                        PhraseQuery pq = new PhraseQuery();
                        for (Term term : queryTerms) {
                            pq.add(term);
                        }
                        Query query = BooleanQueryBuilder.builder()
                                .must(new TermQuery(new Term("second", "yes"))).must(pq).build();
                        hits += searcher.search(query, 1).totalHits;
                    }
                    ms2 = System.currentTimeMillis() - millis;
                    System.out.println("Finished " + name2 + " in " + ms2 + "ms with " + hits + " hits");
                }
            }
            System.out.println(name1 + " took " + (int) ((100.0 * ms1) / ms2) + "% as much time as " + name2);
        }

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.greplin.lucene.query.PredicateBonusQueryTest.java

License:Apache License

@Test
public void testBasics() throws Exception {
    IndexWriter writer = new IndexWriter(this.directory,
            new IndexWriterConfig(Version.LUCENE_35, new WhitespaceAnalyzer(Version.LUCENE_35)));
    writer.addDocument(new DocumentBuilder().add("value", "5").build());
    writer.close();/*from ww w  .j  av  a 2 s .c o m*/

    IndexReader reader = IndexReader.open(this.directory);
    IndexSearcher searcher = new IndexSearcher(reader);

    Query query = new ConstantScoreQuery(new TermQuery(new Term("value", "5")));
    Assert.assertEquals(1.0, searcher.search(query, 1).getMaxScore(), 0.00001);

    Query noBonus = new PredicateBonusQuery(query, Predicates.NONE, 10.0f);
    Assert.assertEquals(1.0, searcher.search(noBonus, 1).getMaxScore(), 0.00001);

    Query bonus = new PredicateBonusQuery(query, Predicates.ALL, 100.0f);
    Assert.assertEquals(101.0, searcher.search(bonus, 1).getMaxScore(), 0.00001);

    Query noMatch = new TermQuery(new Term("value", "not5"));
    Assert.assertEquals(Double.NaN, searcher.search(noMatch, 1).getMaxScore(), 0.00001);

    Query noMatchNoBonus = new PredicateBonusQuery(noMatch, Predicates.NONE, 10.0f);
    Assert.assertEquals(Double.NaN, searcher.search(noMatchNoBonus, 1).getMaxScore(), 0.00001);

    Query noMatchIgnoresBonus = new PredicateBonusQuery(noMatch, Predicates.ALL, 100.0f);
    Assert.assertEquals(Double.NaN, searcher.search(noMatchIgnoresBonus, 1).getMaxScore(), 0.00001);
}

From source file:com.helger.pd.indexer.lucene.PDLucene.java

License:Apache License

public PDLucene() throws IOException {
    // Where to store the index files
    final Path aPath = getLuceneIndexDir().toPath();
    m_aDir = FSDirectory.open(aPath);/*from  w ww  .j ava  2 s.c  o m*/

    // Analyzer to use
    m_aAnalyzer = createAnalyzer();

    // Create the index writer
    final IndexWriterConfig aWriterConfig = new IndexWriterConfig(m_aAnalyzer);
    aWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    m_aIndexWriter = new IndexWriter(m_aDir, aWriterConfig);

    // Reader and searcher are opened on demand

    s_aLogger.info("Lucene index operating on " + aPath);
}

From source file:com.ibm.watson.developer_cloud.professor_languo.ingestion.indexing.LuceneIndexer.java

License:Open Source License

/**
 * Get the index writer in order to perform adding the documents to the index file. Initialize the
 * index writer if it hasn't been created.
 * /*from w  ww.ja v  a2 s.com*/
 * @return the index writer which can add the documents to the index
 * @throws IngestionException
 */
private IndexWriter getIndexWriter() throws IngestionException {
    if (indexWriter == null) {
        try {
            IndexWriterConfig config = new IndexWriterConfig(SingletonAnalyzer.getAnalyzer());
            indexWriter = new IndexWriter(indexDir, config);
        } catch (IOException e) {
            logger.fatal(Messages.getString("RetrieveAndRank.DIR_OPEN_FAIL")); //$NON-NLS-1$
            throw new IngestionException(e);
        }
    }
    return indexWriter;
}