List of usage examples for org.apache.lucene.index IndexWriterConfig setMaxBufferedDocs
@Override public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs)
From source file:dk.defxws.fgslucene.OperationsImpl.java
License:Open Source License
private void getIndexWriter(String indexName) throws GenericSearchException { if (iw == null) { Directory dir;/* w ww . j av a2 s . c o m*/ try { dir = new SimpleFSDirectory(new File(config.getIndexDir(indexName))); } catch (Exception e) { throw new GenericSearchException("IndexWriter new error indexName=" + indexName + " :\n", e); } IndexWriterConfig iwconfig = new IndexWriterConfig(Version.LUCENE_36, getQueryAnalyzer(indexName)); int maxBufferedDocs = config.getMaxBufferedDocs(indexName); if (maxBufferedDocs > 0) { iwconfig.setMaxBufferedDocs(maxBufferedDocs); } int mergeFactor = config.getMergeFactor(indexName); if (mergeFactor > 0) { LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMergeFactor(mergeFactor); iwconfig.setMergePolicy(ldmp); } long defaultWriteLockTimeout = config.getDefaultWriteLockTimeout(indexName); if (defaultWriteLockTimeout > 0) { IndexWriterConfig.setDefaultWriteLockTimeout(defaultWriteLockTimeout); } try { iw = new IndexWriter(dir, iwconfig); } catch (Exception e) { throw new GenericSearchException("IndexWriter new error indexName=" + indexName + " :\n", e); } } try { docCount = iw.numDocs(); } catch (Exception e) { closeIndexWriter(indexName); throw new GenericSearchException("IndexWriter numDocs error indexName=" + indexName + " :\n", e); } if (logger.isDebugEnabled()) logger.debug("getIndexWriter indexName=" + indexName + " docCount=" + docCount); }
From source file:edu.udel.ece.infolab.btc.Indexing.java
License:Apache License
/** * Create a index writer that uses a #TupleAnalyzer on the triples fields with * a tokenization of the URI's localname, and the default #WhitespaceAnalyzer * on the others.//from w w w . jav a 2 s . com * @param dir * @return * @throws IOException */ @SuppressWarnings("deprecation") private IndexWriter initializeIndexWriter(final Directory dir) throws IOException { final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_31); final Map<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>(); final TupleAnalyzer tuple = new TupleAnalyzer(new StandardAnalyzer(Version.LUCENE_31)); tuple.setURINormalisation(URINormalisation.LOCALNAME); fieldAnalyzers.put(OUTGOING_TRIPLE, tuple); fieldAnalyzers.put(INCOMING_TRIPLE, tuple); final IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers)); // Disable compound file ((LogMergePolicy) config.getMergePolicy()).setUseCompoundFile(false); // Increase merge factor to 20 - more adapted to batch creation ((LogMergePolicy) config.getMergePolicy()).setMergeFactor(20); config.setRAMBufferSizeMB(256); config.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); config.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); final IndexWriter writer = new IndexWriter(dir, config); writer.setMaxFieldLength(Integer.MAX_VALUE); return writer; }
From source file:indexer.LuceneIndexer.java
/** * Indexing the files. This method checks for the directories and then * finishes out after the indexing is complete. * @param global This is for reference to the global class variables * and methods./* ww w . ja v a 2s . c om*/ * @param createIndex If true a new index will be created from scratch * and the old index will be destroyed. * @param indexPanel If true it will also print the console printout lines * to the main panel. */ public static void IndexFiles(Global global, Boolean createIndex) { String dataDir = global.dataDir; String indexDir = global.indexDir; //Verifies that the data directory exists if (dataDir == null) { System.err.println("Data Directory Is not accessable, Unable to Index files."); } //Verifies that the data directory is readable and writeable final Path docDir = Paths.get(dataDir); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); } startTime = new Date(); try { System.out.println("Indexing to directory '" + indexDir + "'..."); //Setups the analyzer Analyzer analyzer; try (Directory dir = FSDirectory.open(Paths.get(indexDir))) { analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (createIndex) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } iwc.setRAMBufferSizeMB(global.RAM_BUFFER_SIZE); iwc.setMaxBufferedDocs(global.MAX_BUFFERED_DOCS); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMergeFactor(global.MERGE_FACTOR); iwc.setMergePolicy(ldmp); try (IndexWriter writer = new IndexWriter(dir, iwc)) { hm.clear(); indexDocs(writer, docDir, global); //This is a costly operation, we scheduled the time to apply it if (global.merge) { System.out.println("Starting Merge"); writer.forceMerge(1); global.merge = false; } writer.close(); } finishTime = new Date(); long millis = finishTime.getTime() - startTime.getTime(); totalTime = String.format("%02dhr %02dmin %02dsec", TimeUnit.MILLISECONDS.toHours(millis), TimeUnit.MILLISECONDS.toMinutes(millis) - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(millis)), // The change is in this line TimeUnit.MILLISECONDS.toSeconds(millis) - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis))); System.out.println(""); System.out.println(""); System.out.println("Start Time: " + global.sdf.format(startTime.getTime())); System.out.println("Building List Time: " + listBuildTime); System.out.println("Indexing Time: " + indexingTime); System.out.println("Total Time: " + totalTime); System.out.println("Number of Documents: " + amountOfDocuments); System.out.println("Finish Time: " + global.sdf.format(finishTime.getTime())); System.out.println(""); } analyzer.close(); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); log.fatal(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:io.druid.extension.lucene.LuceneDruidSegment.java
License:Apache License
private static IndexWriter buildRamWriter(RAMDirectory dir, Analyzer analyzer, int maxDocsPerSegment) throws IOException { IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer); writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); // some arbitrary large numbers writerConfig.setMaxBufferedDocs(maxDocsPerSegment * 2); writerConfig.setRAMBufferSizeMB(5000); writerConfig.setUseCompoundFile(false); writerConfig.setCommitOnClose(true); writerConfig.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); writerConfig.setMergePolicy(NoMergePolicy.INSTANCE); writerConfig.setMergeScheduler(NoMergeScheduler.INSTANCE); return new IndexWriter(dir, writerConfig); }
From source file:io.jpress.searcher.LuceneSearcher.java
License:LGPL
public IndexWriter createIndexWriter() throws IOException { if (mIndexFilePath == null) { throw new NullPointerException("please invoke init() method first!"); }//from ww w.ja va 2s.com Analyzer analyzer = new JcsegAnalyzer5X(JcsegTaskConfig.COMPLEX_MODE); // ?(?): ??? JcsegAnalyzer5X jcseg = (JcsegAnalyzer5X) analyzer; // ???, ?jcseg.properties?jcseg.loadsyn=1 JcsegTaskConfig config = jcseg.getTaskConfig(); // ?, ?jcseg.properties?jcseg.loadpinyin=1 config.setAppendCJKSyn(true); // ?, com.webssky.jcseg.core.JcsegTaskConfig config.setAppendCJKPinyin(true); Directory fsDirectory = FSDirectory.open(Paths.get(mIndexFilePath)); IndexWriterConfig indexConfig = new IndexWriterConfig(analyzer); indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexConfig.setMaxBufferedDocs(1000); IndexWriter indexWriter = new IndexWriter(fsDirectory, indexConfig); return indexWriter; }
From source file:net.sf.logsaw.index.internal.ARunWithIndexWriter.java
License:Open Source License
/** * Opens a Lucene index writer, executes the callback method and then closes the writer. * @param log the log resource, may be <code>null</code> * @param analyzer the Lucene analyzer to set on the index writer * @param matchVersion the Lucene match version * @return any object or <code>null</code> * @throws CoreException if an <strong>expected</strong> error occurred *///from w ww . java 2s. c o m protected final T runWithIndexWriter(ILogResource log, Analyzer analyzer, Version matchVersion) throws CoreException { logger.info("Opening index writer for '" + log.getName() + "'..."); //$NON-NLS-1$ //$NON-NLS-2$ IndexWriter writer = null; try { Directory dir = FSDirectory.open(IndexPlugin.getDefault().getIndexFile(log)); LogMergePolicy mp = new LogByteSizeMergePolicy(); mp.setMergeFactor(30); IndexWriterConfig cfg = new IndexWriterConfig(matchVersion, analyzer); cfg.setMaxBufferedDocs(1000); cfg.setMergePolicy(mp); writer = new IndexWriter(dir, cfg); try { return doRunWithIndexWriter(writer, log); } finally { logger.info("Closing index writer for '" + log.getName() + "'..."); //$NON-NLS-1$ //$NON-NLS-2$ writer.close(); } } catch (CoreException e) { // Rethrow original CoreException throw e; } catch (Exception e) { // Unexpected exception; wrap with CoreException throw new CoreException(new Status(IStatus.ERROR, IndexPlugin.PLUGIN_ID, NLS.bind(Messages.LuceneIndexService_error_failedToUpdateIndex, new Object[] { log.getName(), e.getLocalizedMessage() }), e)); } }
From source file:org.ala.lucene.Autocompleter.java
License:Open Source License
@SuppressWarnings("unchecked") public void reIndex(Directory sourceDirectory, String fieldToAutocomplete, boolean createNewIndex) throws CorruptIndexException, IOException { // build a dictionary (from the spell package) IndexReader sourceReader = IndexReader.open(sourceDirectory); LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete); // code from/*ww w . j a v a 2 s. c om*/ // org.apache.lucene.search.spell.SpellChecker.indexDictionary( // Dictionary) IndexWriter.unlock(autoCompleteDirectory); // use a custom analyzer so we can do EdgeNGramFiltering IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SolrUtils.BIE_LUCENE_VERSION, new Analyzer() { protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final StandardTokenizer src = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader); TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader); result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result); result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result); result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result, new CharArraySet(SolrUtils.BIE_LUCENE_VERSION, new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS)), true)); result = new EdgeNGramTokenFilter(result, Side.FRONT, 1, 20); return new TokenStreamComponents(src, result) { @Override protected void setReader(final Reader reader) throws IOException { super.setReader(reader); } }; } // public TokenStream tokenStream(String fieldName, Reader reader) { // TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader); // // result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result); // result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result); // //result = new ISOLatin1AccentFilter(result); // result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result, new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS))); // result = new EdgeNGramTokenFilter(result, Side.FRONT,1, 20); // // return result; // } }); if (createNewIndex) { indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } indexWriterConfig.setMaxBufferedDocs(150); IndexWriter writer = new IndexWriter(autoCompleteDirectory, indexWriterConfig); // writer.setMergeFactor(300); // go through every word, storing the original word (incl. n-grams) // and the number of times it occurs Map<String, Integer> wordsMap = new HashMap<String, Integer>(); Iterator<String> iter = (Iterator<String>) dict.getWordsIterator(); while (iter.hasNext()) { String word = iter.next(); int len = word.length(); if (len < 3) { continue; // too short we bail but "too long" is fine... } if (wordsMap.containsKey(word)) { throw new IllegalStateException("This should never happen in Lucene 2.3.2"); // wordsMap.put(word, wordsMap.get(word) + 1); } else { // use the number of documents this word appears in wordsMap.put(word, sourceReader.docFreq(new Term(fieldToAutocomplete, word))); } } for (String word : wordsMap.keySet()) { // ok index the word Document doc = new Document(); doc.add(new Field(SOURCE_WORD_FIELD, word, Field.Store.YES, Field.Index.NOT_ANALYZED)); // orig term doc.add(new Field(GRAMMED_WORDS_FIELD, word, Field.Store.YES, Field.Index.ANALYZED)); // grammed doc.add(new Field(COUNT_FIELD, Integer.toString(wordsMap.get(word)), Field.Store.NO, Field.Index.NOT_ANALYZED)); // count writer.addDocument(doc); } sourceReader.close(); // close writer writer.forceMerge(1); writer.close(); // re-open our reader reOpenReader(); }
From source file:org.apache.nutch.indexwriter.lucene.LuceneWriter.java
License:Apache License
public void open(JobConf job, String name) throws IOException { this.fs = FileSystem.get(job); perm = new Path(FileOutputFormat.getOutputPath(job), name); temp = job.getLocalPath("index/_" + Integer.toString(new Random().nextInt())); fs.delete(perm, true); // delete old, if any analyzerFactory = new AnalyzerFactory(job); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_10_2, new SmartChineseAnalyzer()); LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setMergeFactor(job.getInt("indexer.mergeFactor", 10)); mergePolicy.setMaxMergeDocs(job.getInt("indexer.maxMergeDocs", Integer.MAX_VALUE)); indexWriterConfig.setMergePolicy(mergePolicy); indexWriterConfig.setUseCompoundFile(false); indexWriterConfig.setTermIndexInterval(job.getInt("indexer.termIndexInterval", 128)); indexWriterConfig.setMaxBufferedDocs(job.getInt("indexer.minMergeDocs", 100)); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); writer = new org.apache.lucene.index.IndexWriter( FSDirectory.open(new File(fs.startLocalOutput(perm, temp).toString())), indexWriterConfig); /*// ww w . j ava 2s . c o m * addFieldOptions("title", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job); * addFieldOptions("url", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job); * addFieldOptions("content", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job); * addFieldOptions("lang", STORE.YES, INDEX.UNTOKENIZED, VECTOR.NO, job); */ processOptions(job); }
From source file:org.apache.solr.search.function.TestOrdValues.java
License:Apache License
protected static void createIndex(boolean doMultiSegment) throws Exception { if (VERBOSE) { System.out.println("TEST: setUp"); }// w ww. ja v a2 s. co m // prepare a small index with just a few documents. dir = newDirectory(); anlzr = new MockAnalyzer(random()); IndexWriterConfig iwc = newIndexWriterConfig(anlzr).setMergePolicy(newLogMergePolicy()); if (doMultiSegment) { iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 7)); } RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); // add docs not exactly in natural ID order, to verify we do check the order of docs by scores int remaining = N_DOCS; boolean done[] = new boolean[N_DOCS]; int i = 0; while (remaining > 0) { if (done[i]) { throw new Exception( "to set this test correctly N_DOCS=" + N_DOCS + " must be primary and greater than 2!"); } addDoc(iw, i); done[i] = true; i = (i + 4) % N_DOCS; remaining--; } if (!doMultiSegment) { if (VERBOSE) { System.out.println("TEST: setUp full merge"); } iw.forceMerge(1); } iw.close(); if (VERBOSE) { System.out.println("TEST: setUp done close"); } }
From source file:org.apache.solr.update.SolrIndexConfig.java
License:Apache License
public IndexWriterConfig toIndexWriterConfig(IndexSchema schema) { // so that we can update the analyzer on core reload, we pass null // for the default analyzer, and explicitly pass an analyzer on // appropriate calls to IndexWriter IndexWriterConfig iwc = new IndexWriterConfig(luceneVersion, null); if (maxBufferedDocs != -1) iwc.setMaxBufferedDocs(maxBufferedDocs); if (ramBufferSizeMB != -1) iwc.setRAMBufferSizeMB(ramBufferSizeMB); if (termIndexInterval != -1) iwc.setTermIndexInterval(termIndexInterval); if (writeLockTimeout != -1) iwc.setWriteLockTimeout(writeLockTimeout); iwc.setSimilarity(schema.getSimilarity()); iwc.setMergePolicy(buildMergePolicy(schema)); iwc.setMergeScheduler(buildMergeScheduler(schema)); iwc.setInfoStream(infoStream);//from ww w.j a v a 2 s . co m // do this after buildMergePolicy since the backcompat logic // there may modify the effective useCompoundFile iwc.setUseCompoundFile(getUseCompoundFile()); if (maxIndexingThreads != -1) { iwc.setMaxThreadStates(maxIndexingThreads); } if (mergedSegmentWarmerInfo != null) { // TODO: add infostream -> normal logging system (there is an issue somewhere) IndexReaderWarmer warmer = schema.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className, IndexReaderWarmer.class, null, new Class[] { InfoStream.class }, new Object[] { iwc.getInfoStream() }); iwc.setMergedSegmentWarmer(warmer); } return iwc; }