Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig(Analyzer analyzer) 

Source Link

Document

Creates a new config that with the provided Analyzer .

Usage

From source file:de.elbe5.cms.search.SearchBean.java

License:Open Source License

protected IndexWriter openIndexWriter(boolean create, String indexPath) throws Exception {
    ensureDirectory(indexPath);//from   www.  j  a  va  2  s. c o  m
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    if (create) {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    return new IndexWriter(dir, iwc);
}

From source file:de.hsmannheim.ss15.alr.searchengine.DefaultLuceneController.java

public void refreshIndex() {
    boolean create = true;
    final Path docDir = Paths.get(docsDir);
    IndexWriter writer = null;/*from w ww .j  a v a  2s.  co m*/
    try {

        Directory dir = FSDirectory.open(Paths.get(indexDir));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        // Add new documents to an existing index:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);
        writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);
        writer.close();

    } catch (IOException e) {
        LOGGER.warn("Exception while indexing", e);

    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (IOException ex) {
                java.util.logging.Logger.getLogger(DefaultLuceneController.class.getName()).log(Level.SEVERE,
                        null, ex);
            }
        }
    }
}

From source file:de.ks.flatadocdb.index.LuceneIndex.java

License:Apache License

public LuceneIndex(Repository repository, MetaModel metaModel, ExecutorService executorService)
        throws RuntimeException {
    super(repository, metaModel, executorService);
    try {/*from www.j a  v  a 2  s  . c o  m*/
        Path resolve = repository.getPath().resolve(LUCENE_INDEX_FOLDER);
        Files.createDirectories(resolve);

        TimeProfiler profiler = new TimeProfiler("Lucene loading").start();
        try {
            this.directory = FSDirectory.open(resolve);
            analyzer = new StandardAnalyzer();
            IndexWriterConfig cfg = new IndexWriterConfig(analyzer);
            cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            indexWriter = new IndexWriter(directory, cfg);
            reopenIndexReader();
        } finally {
            profiler.stop().logDebug(log);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:de.ks.lucene.LuceneTaggingTest.java

License:Apache License

@Test
public void testTags() throws Exception {
    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));

    List<String> allTags = Arrays.asList("Bla Blubb", "Blubb", "Blubber Huhu", "Bla Huhu", "Haha");
    for (String tag : allTags) {
        Document doc = new Document();
        doc.add(new TextField("tags", tag, Field.Store.YES));
        writer.addDocument(doc);/*from w w w  .  j av a 2 s . c o  m*/
    }
    writer.close();

    DirectoryReader directoryReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(directoryReader);
    String term = "blubb";
    TermQuery termQuery = new TermQuery(new Term("tags", term));
    TopDocs search = searcher.search(termQuery, 50);
    log("TermQuery", searcher, search);

    FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term("tags", term));
    search = searcher.search(fuzzyQuery, 50);
    log("FuzzyQuery", searcher, search);

    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("tags", "blubb")), BooleanClause.Occur.SHOULD);
    builder.add(new TermQuery(new Term("tags", "bla")), BooleanClause.Occur.SHOULD);
    BooleanQuery query = builder.build();
    search = searcher.search(query, 50);
    log("BooleanQuery", searcher, search);
}

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public LuceneIndexHandler(Configuration aConfiguration, AnalyzerCache aAnalyzerCache,
        ExecutorPool aExecutorPool, PreviewProcessor aPreviewProcessor) throws IOException {
    previewProcessor = aPreviewProcessor;
    configuration = aConfiguration;//from  w  w  w  .  j a  v a  2  s  . c om
    analyzerCache = aAnalyzerCache;
    executorPool = aExecutorPool;

    contentFieldType = new FieldType();
    contentFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    contentFieldType.setStored(true);
    contentFieldType.setTokenized(true);
    contentFieldType.setStoreTermVectorOffsets(true);
    contentFieldType.setStoreTermVectorPayloads(true);
    contentFieldType.setStoreTermVectorPositions(true);
    contentFieldType.setStoreTermVectors(true);

    analyzer = analyzerCache.getAnalyzer();

    File theIndexDirectory = new File(aConfiguration.getConfigDirectory(), "index");
    theIndexDirectory.mkdirs();

    Directory theIndexFSDirectory = new NRTCachingDirectory(FSDirectory.open(theIndexDirectory.toPath()), 100,
            100);

    IndexWriterConfig theConfig = new IndexWriterConfig(analyzer);
    theConfig.setSimilarity(new CustomSimilarity());
    indexWriter = new IndexWriter(theIndexFSDirectory, theConfig);

    searcherManager = new SearcherManager(indexWriter, true, new SearcherFactory());

    commitThread = new Thread("Lucene Commit Thread") {
        @Override
        public void run() {
            while (!isInterrupted()) {

                if (indexWriter.hasUncommittedChanges()) {
                    try {
                        indexWriter.commit();
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }

                try {
                    Thread.sleep(2000);
                } catch (InterruptedException e) {
                    // Do nothing here
                }
            }
        }
    };
    commitThread.start();

    facetsConfig = new FacetsConfig();
}

From source file:de.mpii.microblogtrack.component.thirdparty.IndexWikipediaDump.java

License:Apache License

public static void constructIndex(String indexPath, String inputPath) throws UnsupportedEncodingException,
        IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
    int threads = 16;
    WikiClean cleaner = new WikiCleanBuilder().withTitle(true).build();
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    // the analyzer should be the same with the runtime analyzer
    IndexWriterConfig iwc = new IndexWriterConfig(
            (Analyzer) Class.forName(Configuration.LUCENE_ANALYZER).newInstance());
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(Configuration.LUCENE_MEM_SIZE);
    IndexWriter writer = new IndexWriter(dir, iwc);
    logger.info("Creating index at " + indexPath);
    logger.info("Indexing with " + threads + " threads");

    long startTime = System.currentTimeMillis();

    try {/* ww  w .j av a  2  s . com*/
        WikipediaXMLDumpInputStream stream = new WikipediaXMLDumpInputStream(inputPath);

        ExecutorService executor = Executors.newFixedThreadPool(threads);
        int cnt = 0;
        String page;
        while ((page = stream.readNext()) != null) {
            String title = cleaner.getTitle(page);

            // These are heuristic specifically for filtering out non-articles in enwiki-20120104.
            if (title.startsWith("Wikipedia:") || title.startsWith("Portal:") || title.startsWith("File:")) {
                continue;
            }

            if (page.contains("#REDIRECT") || page.contains("#redirect") || page.contains("#Redirect")) {
                continue;
            }

            Runnable worker = new AddDocumentRunnable(writer, cleaner, page);
            executor.execute(worker);

            cnt++;
            if (cnt % 10000 == 0) {
                logger.info(cnt + " articles added");
            }

        }

        executor.shutdown();
        // Wait until all threads are finish
        while (!executor.isTerminated()) {
        }

        logger.info("Total of " + cnt + " articles indexed.");

        logger.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
    } catch (Exception ex) {
        logger.error("", ex);
    } finally {
        writer.close();
        dir.close();
    }
}