List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:de.citec.lucene.CreateIndex.java
public static void main(String[] args) throws IOException { Analyzer analyzer = null;/*from w w w . j a v a 2 s. com*/ List<String> files = new ArrayList<>(); files.add("/Users/swalter/Documents/EsaDeutsch/new_copus_german.txt"); String indexPath = "/Users/swalter/Documents/EsaDeutsch/Index/"; Language language = Language.DE; Directory dir = FSDirectory.open(Paths.get(indexPath)); if (language.equals(Language.DE)) analyzer = new GermanAnalyzer(); if (language.equals(Language.ES)) analyzer = new SpanishAnalyzer(); if (language.equals(Language.EN)) analyzer = new EnglishAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(12000); try (IndexWriter writer = new IndexWriter(dir, iwc)) { files.forEach(f -> { try { indexDocs(writer, Paths.get(f)); } catch (IOException ex) { Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex); } }); } System.out.println(counter); }
From source file:de.citec.sc.sentence.preprocessing.lucene.CreateIndex.java
public static void main(String[] args) throws IOException { Analyzer analyzer = null;//ww w .ja v a2 s.c o m List<String> files = new ArrayList<>(); files.add("/Users/swalter/Downloads/german_sentences_reduced.txt"); String indexPath = "/Users/swalter/Index/GermanIndexReduced/"; Language language = Language.DE; Directory dir = FSDirectory.open(Paths.get(indexPath)); //files.add("/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/idealSents_mecab_jdepp_rmvPunct_CoNLLU"); //String indexPath = "/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/index"; //Language language = Language.JA; //Directory dir = FSDirectory.open(Paths.get(indexPath)); if (language.equals(Language.DE)) analyzer = new GermanAnalyzer(); if (language.equals(Language.ES)) analyzer = new SpanishAnalyzer(); if (language.equals(Language.EN)) analyzer = new EnglishAnalyzer(); if (language.equals(Language.JA)) analyzer = new JapaneseAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(12000); try (IndexWriter writer = new IndexWriter(dir, iwc)) { files.forEach(f -> { try { indexDocs(writer, Paths.get(f), language); } catch (IOException ex) { Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex); } }); } }
From source file:de.csw.linkgenerator.plugin.lucene.IndexUpdater.java
License:Open Source License
private void openWriter(OpenMode openMode) { if (writer != null) { LOG.error("Writer already open and createWriter called"); return;// w w w .j a v a 2 s . co m } try { // fix for windows by Daniel Cortes: // FSDirectory f = FSDirectory.getDirectory(indexDir); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, analyzer); conf.setOpenMode(openMode); // Ralph: This is kind of guesswork LogDocMergePolicy mergePolicy = new LogDocMergePolicy(); mergePolicy.setUseCompoundFile(true); conf.setMergePolicy(mergePolicy); // writer = new IndexWriter (indexDir, analyzer, create); writer = new IndexWriter(indexDir, conf); // writer.setUseCompoundFile(true); if (LOG.isDebugEnabled()) { LOG.debug("successfully opened index writer : " + indexDir); } } catch (IOException e) { LOG.error("IOException when opening Lucene Index for writing at " + indexDir, e); } }
From source file:de.dfki.km.leech.lucene.ToLuceneContentHandler.java
License:Open Source License
synchronized protected IndexWriter getCurrentWriter() throws CorruptIndexException, LockObtainFailedException, IOException { if (getSplitAndMergeIndex() <= 0) return m_initialLuceneWriter; if (m_luceneWriter.maxDoc() < getSplitAndMergeIndex()) return m_luceneWriter; Directory directory = m_initialLuceneWriter.getDirectory(); Path fOurTmpDir = null;//from w w w . j av a2 s. c om if (directory instanceof FSDirectory) { if (m_luceneWriter != m_initialLuceneWriter) m_llIndexWriter2Close.add(m_luceneWriter); String strTmpPath = ((FSDirectory) directory).getDirectory().toAbsolutePath().toString(); // if(strTmpPath.charAt(strTmpPath.length() - 1) == '/' || strTmpPath.charAt(strTmpPath.length() - 1) == '\\') // strTmpPath = strTmpPath.substring(0, strTmpPath.length() - 1); strTmpPath += "_" + (m_hsTmpLuceneWriterPaths2Merge.size() + 1); fOurTmpDir = Paths.get(strTmpPath); } else { // wir brauchen was temporres File parentDir = new File(System.getProperty("java.io.tmpdir")); fOurTmpDir = Paths.get(parentDir.getAbsolutePath() + "/leechTmp/" + UUID.randomUUID().toString().replaceAll("\\W", "_")); } Logger.getLogger(ToLuceneContentHandler.class.getName()).info("Current index exceeds " + m_iSplitIndexDocumentCount + " documents. Will create another temporary one under " + fOurTmpDir); @SuppressWarnings("deprecation") IndexWriterConfig config = new IndexWriterConfig(m_initialLuceneWriter.getConfig().getAnalyzer()); config.setOpenMode(OpenMode.CREATE); m_luceneWriter = new IndexWriter(new SimpleFSDirectory(fOurTmpDir), config); m_hsTmpLuceneWriterPaths2Merge.add(fOurTmpDir.toAbsolutePath().toString()); return m_luceneWriter; }
From source file:de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory.java
License:Open Source License
/** * Creates all writer, reader, and searcher objects if necessary * /* w ww . j av a 2s . com*/ * @throws CorruptIndexException * @throws LockObtainFailedException * @throws IOException */ public void openLuceneStuff() throws CorruptIndexException, LockObtainFailedException, IOException { if (m_indexWriter == null) { IndexWriterConfig config = new IndexWriterConfig(new KeywordAnalyzer()); config.setOpenMode(OpenMode.CREATE_OR_APPEND); m_indexWriter = new IndexWriter(new SimpleFSDirectory(Paths.get(m_strHistoryPath)), config); } if (m_indexReader == null) m_indexReader = DirectoryReader.open(m_indexWriter, true, true); if (m_indexSearcher == null) m_indexSearcher = new IndexSearcher(m_indexReader); }
From source file:de.elbe5.cms.search.SearchBean.java
License:Open Source License
protected IndexWriter openIndexWriter(boolean create, String indexPath) throws Exception { ensureDirectory(indexPath);//from ww w .j a v a2 s. c o m Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } return new IndexWriter(dir, iwc); }
From source file:de.hsmannheim.ss15.alr.searchengine.DefaultLuceneController.java
public void refreshIndex() { boolean create = true; final Path docDir = Paths.get(docsDir); IndexWriter writer = null;// ww w .jav a 2 s . c om try { Directory dir = FSDirectory.open(Paths.get(indexDir)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); // Add new documents to an existing index: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); } catch (IOException e) { LOGGER.warn("Exception while indexing", e); } finally { if (writer != null) { try { writer.close(); } catch (IOException ex) { java.util.logging.Logger.getLogger(DefaultLuceneController.class.getName()).log(Level.SEVERE, null, ex); } } } }
From source file:de.jetsli.lumeo.LucPerfTest.java
License:Apache License
public void testPerf() { new PerfRunner(1000000, 26f) { @Override/* w ww. j av a 2s . c om*/ public void reinit() throws Exception { super.reinit(); if (nrtManager != null) { nrtManager.close(); reopenThread.close(); writer.waitForMerges(); writer.close(); dir.close(); } Helper.deleteDir(file); docs = 0; IndexWriterConfig cfg = new IndexWriterConfig(version, keyAna); cfg.setRAMBufferSizeMB(128); // cfg.setCodec(new Lucene40Codec() { // // @Override public PostingsFormat getPostingsFormatForField(String field) { // if ("_id".equals(field)) // return new Pulsing40PostingsFormat(); // else // return new Lucene40PostingsFormat(); // } // }); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.setUseCompoundFile(false); cfg.setMergePolicy(mp); dir = FSDirectory.open(file); cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(dir, cfg); nrtManager = new NRTManager(writer, new SearcherWarmer() { @Override public void warm(IndexSearcher s) throws IOException { // TODO get some random vertices via getVertices? } }); int priority = Math.min(Thread.currentThread().getPriority() + 2, Thread.MAX_PRIORITY); reopenThread = new NRTManagerReopenThread(nrtManager, 5.0, 0.03); reopenThread.setName("NRT Reopen Thread"); reopenThread.setPriority(priority); reopenThread.setDaemon(true); reopenThread.start(); } final BytesRef bytes = new BytesRef(); @Override public void innerRun(int trial, int i) { long id = i; Document newDoc = new Document(); NumericField idField = new NumericField("_id", 6, NumericField.TYPE_STORED).setLongValue(id); Field uIdField = new Field("_uid", "" + id, StringField.TYPE_STORED); Field typeField = new Field("_type", "test", StringField.TYPE_STORED); newDoc.add(idField); newDoc.add(uIdField); newDoc.add(typeField); // Analyzer ana = anas.get(newDoc.get("_type")); try { NumericUtils.longToPrefixCoded(id, 0, bytes); latestGen = nrtManager.updateDocument(new Term("_id", bytes), newDoc, keyAna); docs++; } catch (IOException ex) { logger.error("Cannot update " + i, ex); } } @Override protected void finalAssert() throws Exception { // logger.info("wait for " + latestGen + ", current:" + nrtManager.getCurrentSearchingGen(true)); nrtManager.waitForGeneration(latestGen, true); // writer.commit(); // writer.waitForMerges(); SearcherManager mng = nrtManager.getSearcherManager(true); // mng.maybeReopen(); IndexSearcher searcher = mng.acquire(); try { TotalHitCountCollector coll = new TotalHitCountCollector(); searcher.search(new MatchAllDocsQuery(), coll); long total = coll.getTotalHits(); if (docs != total) throw new IllegalStateException(total + " vs. " + docs); } finally { nrtManager.getSearcherManager(true).release(searcher); } } }.run(); }
From source file:de.jetsli.lumeo.RawLucene.java
License:Apache License
public RawLucene init() { indexLock();/*from w w w. ja v a 2 s .c o m*/ try { if (closed) throw new IllegalStateException("Already closed"); if (writer != null) throw new IllegalStateException("Already initialized"); // release locks when started if (IndexWriter.isLocked(dir)) { logger.warn("index is locked + " + name + " -> releasing lock"); IndexWriter.unlock(dir); } IndexWriterConfig cfg = new IndexWriterConfig(VERSION, defaultMapping.getCombinedAnalyzer()); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.setMaxMergeMB(getMaxMergeMB()); cfg.setRAMBufferSizeMB(ramBufferSizeMB); cfg.setTermIndexInterval(termIndexIntervalSize); cfg.setMergePolicy(mp); // TODO specify different formats for id fields etc // -> this breaks 16 of our tests!? Lucene Bug? // cfg.setCodec(new Lucene40Codec() { // // @Override public PostingsFormat getPostingsFormatForField(String field) { // return new Pulsing40PostingsFormat(); // } // }); // cfg.setMaxThreadStates(8); boolean create = !DirectoryReader.indexExists(dir); cfg.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); //wrap the writer with a tracking index writer writer = new TrackingIndexWriter(new IndexWriter(dir, cfg)); nrtManager = new NRTManager(writer, new SearcherFactory() { // @Override // public IndexSearcher newSearcher(IndexReader reader) throws IOException { // //TODO do some kind of warming here? // return new IndexSearcher(reader); // } }); getCurrentRTCache(latestGen); int priority = Math.min(Thread.currentThread().getPriority() + 2, Thread.MAX_PRIORITY); flushThread = new FlushThread("flush-thread"); flushThread.setPriority(priority); flushThread.setDaemon(true); flushThread.start(); reopenThread = new NRTManagerReopenThread(nrtManager, ordinaryWaiting, incomingSearchesMaximumWaiting); reopenThread.setName("NRT Reopen Thread"); reopenThread.setPriority(priority); reopenThread.setDaemon(true); reopenThread.start(); return this; } catch (Exception e) { throw new RuntimeException(e); } finally { indexUnlock(); } }
From source file:de.ks.flatadocdb.index.LuceneIndex.java
License:Apache License
public LuceneIndex(Repository repository, MetaModel metaModel, ExecutorService executorService) throws RuntimeException { super(repository, metaModel, executorService); try {// www . j a v a2 s . co m Path resolve = repository.getPath().resolve(LUCENE_INDEX_FOLDER); Files.createDirectories(resolve); TimeProfiler profiler = new TimeProfiler("Lucene loading").start(); try { this.directory = FSDirectory.open(resolve); analyzer = new StandardAnalyzer(); IndexWriterConfig cfg = new IndexWriterConfig(analyzer); cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory, cfg); reopenIndexReader(); } finally { profiler.stop().logDebug(log); } } catch (IOException e) { throw new RuntimeException(e); } }