List of usage examples for org.apache.lucene.index IndexWriterConfig getMergePolicy
@Override
public MergePolicy getMergePolicy()
From source file:com.nearinfinity.blur.mapreduce.BlurReducer.java
License:Apache License
protected void cleanupFromRebuild(Context context) throws IOException, InterruptedException { _writer.commit();// w w w. j av a2s .c o m _writer.close(); IndexReader reader = IndexReader.open(_directory); TableDescriptor descriptor = _blurTask.getTableDescriptor(); Path directoryPath = _blurTask.getDirectoryPath(context); remove(directoryPath); NoLockFactory lockFactory = NoLockFactory.getNoLockFactory(); Directory destDirectory = getDestDirectory(descriptor, directoryPath); destDirectory.setLockFactory(lockFactory); boolean optimize = _blurTask.getOptimize(); if (optimize) { context.setStatus("Starting Copy-Optimize Phase"); IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, _analyzer); TieredMergePolicy policy = (TieredMergePolicy) conf.getMergePolicy(); policy.setUseCompoundFile(false); long s = System.currentTimeMillis(); IndexWriter writer = new IndexWriter(getBiggerBuffers(destDirectory), conf); writer.addIndexes(reader); writer.close(); long e = System.currentTimeMillis(); context.setStatus("Copying phase took [" + (e - s) + " ms]"); LOG.info("Copying phase took [" + (e - s) + " ms]"); } else { context.setStatus("Starting Copy-Optimize Phase"); long s = System.currentTimeMillis(); List<String> files = getFilesOrderedBySize(_directory); long totalBytesToCopy = getTotalBytes(_directory); long totalBytesCopied = 0; long startTime = System.currentTimeMillis(); for (String file : files) { totalBytesCopied += copy(_directory, destDirectory, file, file, context, totalBytesCopied, totalBytesToCopy, startTime); } long e = System.currentTimeMillis(); context.setStatus("Copying phase took [" + (e - s) + " ms]"); LOG.info("Copying phase took [" + (e - s) + " ms]"); } }
From source file:com.nearinfinity.blur.mapreduce.BlurReducer.java
License:Apache License
protected void setupWriter(Context context) throws IOException { nullCheck(_directory);/*from ww w . j a v a 2 s .c om*/ nullCheck(_analyzer); IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, _analyzer); config.setSimilarity(new FairSimilarity()); config.setRAMBufferSizeMB(_blurTask.getRamBufferSizeMB()); TieredMergePolicy mergePolicy = (TieredMergePolicy) config.getMergePolicy(); mergePolicy.setUseCompoundFile(false); _writer = new IndexWriter(_directory, config); }
From source file:com.stratio.cassandra.index.LuceneIndex.java
License:Apache License
/** * Initializes this using the specified {@link Sort} for trying to keep the {@link Document}s sorted. * * @param sort The {@link Sort} to be used. *///from w w w. j a v a2 s . com public void init(Sort sort) { Log.debug("Initializing index"); try { this.sort = sort; // Get directory file file = new File(path); // Open or create directory FSDirectory fsDirectory = FSDirectory.open(file); directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB); // Setup index writer IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_48, analyzer); config.setRAMBufferSizeMB(ramBufferMB); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setUseCompoundFile(true); config.setMergePolicy(new SortingMergePolicy(config.getMergePolicy(), sort)); indexWriter = new IndexWriter(directory, config); // Setup NRT search SearcherFactory searcherFactory = new SearcherFactory() { public IndexSearcher newSearcher(IndexReader reader) throws IOException { IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new NoIDFSimilarity()); return searcher; } }; TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter); searcherManager = new SearcherManager(indexWriter, true, searcherFactory); searcherReopener = new ControlledRealTimeReopenThread<>(trackingIndexWriter, searcherManager, refreshSeconds, refreshSeconds); searcherReopener.start(); // Start the refresher thread } catch (IOException e) { Log.error(e, "Error while initializing index"); throw new RuntimeException(e); } }
From source file:edu.udel.ece.infolab.btc.Indexing.java
License:Apache License
/** * Create a index writer that uses a #TupleAnalyzer on the triples fields with * a tokenization of the URI's localname, and the default #WhitespaceAnalyzer * on the others.//from w w w . j a va2 s . c o m * @param dir * @return * @throws IOException */ @SuppressWarnings("deprecation") private IndexWriter initializeIndexWriter(final Directory dir) throws IOException { final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_31); final Map<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>(); final TupleAnalyzer tuple = new TupleAnalyzer(new StandardAnalyzer(Version.LUCENE_31)); tuple.setURINormalisation(URINormalisation.LOCALNAME); fieldAnalyzers.put(OUTGOING_TRIPLE, tuple); fieldAnalyzers.put(INCOMING_TRIPLE, tuple); final IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers)); // Disable compound file ((LogMergePolicy) config.getMergePolicy()).setUseCompoundFile(false); // Increase merge factor to 20 - more adapted to batch creation ((LogMergePolicy) config.getMergePolicy()).setMergeFactor(20); config.setRAMBufferSizeMB(256); config.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); config.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); final IndexWriter writer = new IndexWriter(dir, config); writer.setMaxFieldLength(Integer.MAX_VALUE); return writer; }
From source file:nl.inl.util.Utilities.java
License:Apache License
public static IndexWriterConfig getIndexWriterConfig(Analyzer analyzer, boolean create) { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42, analyzer); config.setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND); config.setRAMBufferSizeMB(150); // faster indexing // Set merge factor (if using LogMergePolicy, which is the default up to version LUCENE_32, // so yes)/* www .j av a 2s. c o m*/ MergePolicy mp = config.getMergePolicy(); if (mp instanceof LogMergePolicy) { ((LogMergePolicy) mp).setMergeFactor(40); // faster indexing } return config; }
From source file:org.apache.blur.manager.writer.IndexImporterTest.java
License:Apache License
private void setupWriter(Configuration configuration) throws IOException { TableDescriptor tableDescriptor = new TableDescriptor(); tableDescriptor.setName("test-table"); String uuid = UUID.randomUUID().toString(); tableDescriptor.setTableUri(new Path(_base, "table-table").toUri().toString()); tableDescriptor.setShardCount(2);//from w ww .java 2 s. co m TableContext tableContext = TableContext.create(tableDescriptor); ShardContext shardContext = ShardContext.create(tableContext, "shard-00000000"); Path tablePath = new Path(_base, "table-table"); _shardPath = new Path(tablePath, "shard-00000000"); String indexDirName = "index_" + uuid; _path = new Path(_shardPath, indexDirName + ".commit"); _fileSystem.mkdirs(_path); _badRowIdsPath = new Path(_shardPath, indexDirName + ".badrowids"); _badIndexPath = new Path(_shardPath, indexDirName + ".badindex"); _inUsePath = new Path(_shardPath, indexDirName + ".inuse"); Directory commitDirectory = new HdfsDirectory(configuration, _path); _mainDirectory = new HdfsDirectory(configuration, _shardPath); _fieldManager = tableContext.getFieldManager(); Analyzer analyzerForIndex = _fieldManager.getAnalyzerForIndex(); IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, analyzerForIndex); // conf.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy(); mergePolicy.setUseCompoundFile(false); _commitWriter = new IndexWriter(commitDirectory, conf.clone()); // Make sure there's an empty index... new IndexWriter(_mainDirectory, conf.clone()).close(); _mainWriter = new IndexWriter(_mainDirectory, conf.clone()); BufferStore.initNewBuffer(128, 128 * 128); _indexImporter = new IndexImporter(_timer, getBlurIndex(shardContext, _mainDirectory), shardContext, TimeUnit.MINUTES, 10, null); }
From source file:org.apache.blur.store.hdfs_v2.FastHdfsKeyValueDirectoryTest.java
License:Apache License
@Test public void testMulipleCommitsAndReopens() throws IOException { IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); conf.setMergeScheduler(new SerialMergeScheduler()); TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy(); mergePolicy.setUseCompoundFile(false); Set<String> fileSet = new TreeSet<String>(); long seed = new Random().nextLong(); System.out.println("Seed:" + seed); Random random = new Random(seed); int docCount = 0; int passes = 10; byte[] segmentsGenContents = null; for (int run = 0; run < passes; run++) { final FastHdfsKeyValueDirectory directory = new FastHdfsKeyValueDirectory(false, _timer, _configuration, new Path(_path, "test_multiple_commits_reopens")); if (segmentsGenContents != null) { byte[] segmentsGenContentsCurrent = readSegmentsGen(directory); assertTrue(Arrays.equals(segmentsGenContents, segmentsGenContentsCurrent)); }//from www. j av a 2 s .co m assertFiles(fileSet, run, -1, directory); assertEquals(docCount, getDocumentCount(directory)); IndexWriter writer = new IndexWriter(directory, conf.clone()); int numberOfCommits = random.nextInt(100); for (int i = 0; i < numberOfCommits; i++) { assertFiles(fileSet, run, i, directory); addDocuments(writer, random.nextInt(100)); // Before Commit writer.commit(); // After Commit // Set files after commit { fileSet.clear(); List<IndexCommit> listCommits = DirectoryReader.listCommits(directory); assertEquals(1, listCommits.size()); IndexCommit indexCommit = listCommits.get(0); fileSet.addAll(indexCommit.getFileNames()); } segmentsGenContents = readSegmentsGen(directory); } docCount = getDocumentCount(directory); } }
From source file:org.apache.blur.utils.TableShardCountCollapserTest.java
License:Apache License
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount) throws IOException { HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy(); mergePolicy.setUseCompoundFile(false); IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf); Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>(); int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount); assertEquals(i, partition);//w ww . j av a 2 s . c o m Document doc = getDoc(i); indexWriter.addDocument(doc); indexWriter.close(); }
From source file:org.apache.solr.core.TestConfig.java
License:Apache License
@Test public void testDefaults() throws Exception { SolrConfig sc = new SolrConfig(new SolrResourceLoader("solr/collection1"), "solrconfig-defaults.xml", null); SolrIndexConfig sic = sc.indexConfig; assertEquals("default ramBufferSizeMB", 100.0D, sic.ramBufferSizeMB, 0.0D); assertEquals("default LockType", SolrIndexConfig.LOCK_TYPE_NATIVE, sic.lockType); assertEquals("default useCompoundFile", false, sic.useCompoundFile); IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig); IndexWriterConfig iwc = sic.toIndexWriterConfig(indexSchema); assertNotNull("null mp", iwc.getMergePolicy()); assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy); assertNotNull("null ms", iwc.getMergeScheduler()); assertTrue("ms is not CMS", iwc.getMergeScheduler() instanceof ConcurrentMergeScheduler); }
From source file:org.apache.solr.core.TestMergePolicyConfig.java
License:Apache License
public void testDefaultMergePolicyConfig() throws Exception { initCore("solrconfig-mergepolicy-defaults.xml", "schema-minimal.xml"); IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); assertEquals(false, iwc.getUseCompoundFile()); TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class, iwc.getMergePolicy()); assertEquals(0.0D, tieredMP.getNoCFSRatio(), 0.0D); assertCommitSomeNewDocs();//from w ww .j a v a 2 s.co m assertCompoundSegments(h.getCore(), false); }