Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig(Analyzer analyzer)

Source Link

Document

Creates a new config that with the provided Analyzer .

Usage

From source file:de.elbe5.cms.search.SearchBean.java

License:Open Source License

protected IndexWriter openIndexWriter(boolean create, String indexPath) throws Exception {
    ensureDirectory(indexPath);//from   www.  j  a  va  2  s. c o  m
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    if (create) {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    return new IndexWriter(dir, iwc);
}

From source file:de.hsmannheim.ss15.alr.searchengine.DefaultLuceneController.java

public void refreshIndex() {
    boolean create = true;
    final Path docDir = Paths.get(docsDir);
    IndexWriter writer = null;/*from w ww .j  a v a  2s.  co m*/
    try {

        Directory dir = FSDirectory.open(Paths.get(indexDir));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        // Add new documents to an existing index:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);
        writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);
        writer.close();

    } catch (IOException e) {
        LOGGER.warn("Exception while indexing", e);

    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (IOException ex) {
                java.util.logging.Logger.getLogger(DefaultLuceneController.class.getName()).log(Level.SEVERE,
                        null, ex);
            }
        }
    }
}

From source file:de.ks.flatadocdb.index.LuceneIndex.java

License:Apache License

public LuceneIndex(Repository repository, MetaModel metaModel, ExecutorService executorService)
        throws RuntimeException {
    super(repository, metaModel, executorService);
    try {/*from www.j a  v  a 2  s  . c o  m*/
        Path resolve = repository.getPath().resolve(LUCENE_INDEX_FOLDER);
        Files.createDirectories(resolve);

        TimeProfiler profiler = new TimeProfiler("Lucene loading").start();
        try {
            this.directory = FSDirectory.open(resolve);
            analyzer = new StandardAnalyzer();
            IndexWriterConfig cfg = new IndexWriterConfig(analyzer);
            cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            indexWriter = new IndexWriter(directory, cfg);
            reopenIndexReader();
        } finally {
            profiler.stop().logDebug(log);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:de.ks.lucene.LuceneTaggingTest.java

License:Apache License

@Test
public void testTags() throws Exception {
    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));

    List<String> allTags = Arrays.asList("Bla Blubb", "Blubb", "Blubber Huhu", "Bla Huhu", "Haha");
    for (String tag : allTags) {
        Document doc = new Document();
        doc.add(new TextField("tags", tag, Field.Store.YES));
        writer.addDocument(doc);/*from w w w  .  j av a 2 s . c o  m*/
    }
    writer.close();

    DirectoryReader directoryReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(directoryReader);
    String term = "blubb";
    TermQuery termQuery = new TermQuery(new Term("tags", term));
    TopDocs search = searcher.search(termQuery, 50);
    log("TermQuery", searcher, search);

    FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term("tags", term));
    search = searcher.search(fuzzyQuery, 50);
    log("FuzzyQuery", searcher, search);

    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("tags", "blubb")), BooleanClause.Occur.SHOULD);
    builder.add(new TermQuery(new Term("tags", "bla")), BooleanClause.Occur.SHOULD);
    BooleanQuery query = builder.build();
    search = searcher.search(query, 50);
    log("BooleanQuery", searcher, search);
}

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public LuceneIndexHandler(Configuration aConfiguration, AnalyzerCache aAnalyzerCache,
        ExecutorPool aExecutorPool, PreviewProcessor aPreviewProcessor) throws IOException {
    previewProcessor = aPreviewProcessor;
    configuration = aConfiguration;//from  w  w  w  .  j a  v a  2  s  . c om
    analyzerCache = aAnalyzerCache;
    executorPool = aExecutorPool;

    contentFieldType = new FieldType();
    contentFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    contentFieldType.setStored(true);
    contentFieldType.setTokenized(true);
    contentFieldType.setStoreTermVectorOffsets(true);
    contentFieldType.setStoreTermVectorPayloads(true);
    contentFieldType.setStoreTermVectorPositions(true);
    contentFieldType.setStoreTermVectors(true);

    analyzer = analyzerCache.getAnalyzer();

    File theIndexDirectory = new File(aConfiguration.getConfigDirectory(), "index");
    theIndexDirectory.mkdirs();

    Directory theIndexFSDirectory = new NRTCachingDirectory(FSDirectory.open(theIndexDirectory.toPath()), 100,
            100);

    IndexWriterConfig theConfig = new IndexWriterConfig(analyzer);
    theConfig.setSimilarity(new CustomSimilarity());
    indexWriter = new IndexWriter(theIndexFSDirectory, theConfig);

    searcherManager = new SearcherManager(indexWriter, true, new SearcherFactory());

    commitThread = new Thread("Lucene Commit Thread") {
        @Override
        public void run() {
            while (!isInterrupted()) {

                if (indexWriter.hasUncommittedChanges()) {
                    try {
                        indexWriter.commit();
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }

                try {
                    Thread.sleep(2000);
                } catch (InterruptedException e) {
                    // Do nothing here
                }
            }
        }
    };
    commitThread.start();

    facetsConfig = new FacetsConfig();
}

From source file:de.mpii.microblogtrack.component.thirdparty.IndexWikipediaDump.java

License:Apache License

public static void constructIndex(String indexPath, String inputPath) throws UnsupportedEncodingException,
        IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
    int threads = 16;
    WikiClean cleaner = new WikiCleanBuilder().withTitle(true).build();
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    // the analyzer should be the same with the runtime analyzer
    IndexWriterConfig iwc = new IndexWriterConfig(
            (Analyzer) Class.forName(Configuration.LUCENE_ANALYZER).newInstance());
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(Configuration.LUCENE_MEM_SIZE);
    IndexWriter writer = new IndexWriter(dir, iwc);
    logger.info("Creating index at " + indexPath);
    logger.info("Indexing with " + threads + " threads");

    long startTime = System.currentTimeMillis();

    try {/* ww  w .j av a  2  s . com*/
        WikipediaXMLDumpInputStream stream = new WikipediaXMLDumpInputStream(inputPath);

        ExecutorService executor = Executors.newFixedThreadPool(threads);
        int cnt = 0;
        String page;
        while ((page = stream.readNext()) != null) {
            String title = cleaner.getTitle(page);

            // These are heuristic specifically for filtering out non-articles in enwiki-20120104.
            if (title.startsWith("Wikipedia:") || title.startsWith("Portal:") || title.startsWith("File:")) {
                continue;
            }

            if (page.contains("#REDIRECT") || page.contains("#redirect") || page.contains("#Redirect")) {
                continue;
            }

            Runnable worker = new AddDocumentRunnable(writer, cleaner, page);
            executor.execute(worker);

            cnt++;
            if (cnt % 10000 == 0) {
                logger.info(cnt + " articles added");
            }

        }

        executor.shutdown();
        // Wait until all threads are finish
        while (!executor.isTerminated()) {
        }

        logger.info("Total of " + cnt + " articles indexed.");

        logger.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
    } catch (Exception ex) {
        logger.error("", ex);
    } finally {
        writer.close();
        dir.close();
    }
}