List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig
public IndexWriterConfig(Analyzer analyzer)
From source file:de.elbe5.cms.search.SearchBean.java
License:Open Source License
protected IndexWriter openIndexWriter(boolean create, String indexPath) throws Exception { ensureDirectory(indexPath);//from www. j a va 2 s. c o m Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } return new IndexWriter(dir, iwc); }
From source file:de.hsmannheim.ss15.alr.searchengine.DefaultLuceneController.java
public void refreshIndex() { boolean create = true; final Path docDir = Paths.get(docsDir); IndexWriter writer = null;/*from w ww .j a v a 2s. co m*/ try { Directory dir = FSDirectory.open(Paths.get(indexDir)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); // Add new documents to an existing index: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); } catch (IOException e) { LOGGER.warn("Exception while indexing", e); } finally { if (writer != null) { try { writer.close(); } catch (IOException ex) { java.util.logging.Logger.getLogger(DefaultLuceneController.class.getName()).log(Level.SEVERE, null, ex); } } } }
From source file:de.ks.flatadocdb.index.LuceneIndex.java
License:Apache License
public LuceneIndex(Repository repository, MetaModel metaModel, ExecutorService executorService) throws RuntimeException { super(repository, metaModel, executorService); try {/*from www.j a v a 2 s . c o m*/ Path resolve = repository.getPath().resolve(LUCENE_INDEX_FOLDER); Files.createDirectories(resolve); TimeProfiler profiler = new TimeProfiler("Lucene loading").start(); try { this.directory = FSDirectory.open(resolve); analyzer = new StandardAnalyzer(); IndexWriterConfig cfg = new IndexWriterConfig(analyzer); cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory, cfg); reopenIndexReader(); } finally { profiler.stop().logDebug(log); } } catch (IOException e) { throw new RuntimeException(e); } }
From source file:de.ks.lucene.LuceneTaggingTest.java
License:Apache License
@Test public void testTags() throws Exception { IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer)); List<String> allTags = Arrays.asList("Bla Blubb", "Blubb", "Blubber Huhu", "Bla Huhu", "Haha"); for (String tag : allTags) { Document doc = new Document(); doc.add(new TextField("tags", tag, Field.Store.YES)); writer.addDocument(doc);/*from w w w . j av a 2 s . c o m*/ } writer.close(); DirectoryReader directoryReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(directoryReader); String term = "blubb"; TermQuery termQuery = new TermQuery(new Term("tags", term)); TopDocs search = searcher.search(termQuery, 50); log("TermQuery", searcher, search); FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term("tags", term)); search = searcher.search(fuzzyQuery, 50); log("FuzzyQuery", searcher, search); BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("tags", "blubb")), BooleanClause.Occur.SHOULD); builder.add(new TermQuery(new Term("tags", "bla")), BooleanClause.Occur.SHOULD); BooleanQuery query = builder.build(); search = searcher.search(query, 50); log("BooleanQuery", searcher, search); }
From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java
License:Open Source License
public LuceneIndexHandler(Configuration aConfiguration, AnalyzerCache aAnalyzerCache, ExecutorPool aExecutorPool, PreviewProcessor aPreviewProcessor) throws IOException { previewProcessor = aPreviewProcessor; configuration = aConfiguration;//from w w w . j a v a 2 s . c om analyzerCache = aAnalyzerCache; executorPool = aExecutorPool; contentFieldType = new FieldType(); contentFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); contentFieldType.setStored(true); contentFieldType.setTokenized(true); contentFieldType.setStoreTermVectorOffsets(true); contentFieldType.setStoreTermVectorPayloads(true); contentFieldType.setStoreTermVectorPositions(true); contentFieldType.setStoreTermVectors(true); analyzer = analyzerCache.getAnalyzer(); File theIndexDirectory = new File(aConfiguration.getConfigDirectory(), "index"); theIndexDirectory.mkdirs(); Directory theIndexFSDirectory = new NRTCachingDirectory(FSDirectory.open(theIndexDirectory.toPath()), 100, 100); IndexWriterConfig theConfig = new IndexWriterConfig(analyzer); theConfig.setSimilarity(new CustomSimilarity()); indexWriter = new IndexWriter(theIndexFSDirectory, theConfig); searcherManager = new SearcherManager(indexWriter, true, new SearcherFactory()); commitThread = new Thread("Lucene Commit Thread") { @Override public void run() { while (!isInterrupted()) { if (indexWriter.hasUncommittedChanges()) { try { indexWriter.commit(); } catch (IOException e) { throw new RuntimeException(e); } } try { Thread.sleep(2000); } catch (InterruptedException e) { // Do nothing here } } } }; commitThread.start(); facetsConfig = new FacetsConfig(); }
From source file:de.mpii.microblogtrack.component.thirdparty.IndexWikipediaDump.java
License:Apache License
public static void constructIndex(String indexPath, String inputPath) throws UnsupportedEncodingException, IOException, ClassNotFoundException, InstantiationException, IllegalAccessException { int threads = 16; WikiClean cleaner = new WikiCleanBuilder().withTitle(true).build(); Directory dir = FSDirectory.open(Paths.get(indexPath)); // the analyzer should be the same with the runtime analyzer IndexWriterConfig iwc = new IndexWriterConfig( (Analyzer) Class.forName(Configuration.LUCENE_ANALYZER).newInstance()); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); iwc.setRAMBufferSizeMB(Configuration.LUCENE_MEM_SIZE); IndexWriter writer = new IndexWriter(dir, iwc); logger.info("Creating index at " + indexPath); logger.info("Indexing with " + threads + " threads"); long startTime = System.currentTimeMillis(); try {/* ww w .j av a 2 s . com*/ WikipediaXMLDumpInputStream stream = new WikipediaXMLDumpInputStream(inputPath); ExecutorService executor = Executors.newFixedThreadPool(threads); int cnt = 0; String page; while ((page = stream.readNext()) != null) { String title = cleaner.getTitle(page); // These are heuristic specifically for filtering out non-articles in enwiki-20120104. if (title.startsWith("Wikipedia:") || title.startsWith("Portal:") || title.startsWith("File:")) { continue; } if (page.contains("#REDIRECT") || page.contains("#redirect") || page.contains("#Redirect")) { continue; } Runnable worker = new AddDocumentRunnable(writer, cleaner, page); executor.execute(worker); cnt++; if (cnt % 10000 == 0) { logger.info(cnt + " articles added"); } } executor.shutdown(); // Wait until all threads are finish while (!executor.isTerminated()) { } logger.info("Total of " + cnt + " articles indexed."); logger.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception ex) { logger.error("", ex); } finally { writer.close(); dir.close(); } }