List of usage examples for org.apache.lucene.index IndexWriterConfig setIndexDeletionPolicy
public IndexWriterConfig setIndexDeletionPolicy(IndexDeletionPolicy delPolicy)
From source file:cn.hbu.cs.esearch.index.DiskSearchIndex.java
License:Apache License
/** * Opens an index modifier.//from ww w . j ava2 s . com * @param analyzer Analyzer * @return IndexModifer instance */ @Override public IndexWriter openIndexWriter(Analyzer analyzer, Similarity similarity) throws IOException { if (_indexWriter != null) { return _indexWriter; } Directory directory = _dirMgr.getDirectory(true); log.info("opening index writer at: " + _dirMgr.getPath()); EsearchMergePolicy mergePolicy = new EsearchMergePolicy(); mergePolicy.setMergePolicyParams(_mergePolicyParams); // hao: autocommit is set to false with this constructor IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); _deletionPolicy = new ZoieIndexDeletionPolicy(); config.setIndexDeletionPolicy(_deletionPolicy); config.setMergeScheduler(_mergeScheduler); config.setMergePolicy(mergePolicy); config.setReaderPooling(false); if (similarity != null) { config.setSimilarity(similarity); } config.setRAMBufferSizeMB(5); IndexWriter idxWriter = new IndexWriter(directory, config); // we need retrieve deletionPolicy from IndexWriter since deletionPolicy is deep cloned _deletionPolicy = (ZoieIndexDeletionPolicy) (idxWriter.getConfig().getIndexDeletionPolicy()); _indexWriter = idxWriter; return idxWriter; }
From source file:com.b2international.index.compat.SingleDirectoryIndexImpl.java
License:Apache License
protected void initLucene(final File indexDirectory, final boolean clean) { try {/*from w w w . j a v a2s . c o m*/ this.directory = Directories.openFile(indexDirectory.toPath()); final Analyzer analyzer = new ComponentTermAnalyzer(); final IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setOpenMode(clean ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND); config.setIndexDeletionPolicy(new SnapshotDeletionPolicy(config.getIndexDeletionPolicy())); this.writer = new IndexWriter(directory, config); this.writer.commit(); // Create index if it didn't exist this.manager = new SearcherManager(directory, new SearchWarmerFactory()); } catch (final IOException e) { throw new RuntimeException(e.getMessage(), e); } }
From source file:com.mathworks.xzheng.admin.Fragments.java
License:Apache License
public void test() throws Exception { Directory dir = null;/* w w w. j a v a2s. c om*/ Analyzer analyzer = null; // START IndexDeletionPolicy policy = new KeepOnlyLastCommitDeletionPolicy(); SnapshotDeletionPolicy snapshotter = new SnapshotDeletionPolicy(policy); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); config.setIndexDeletionPolicy(snapshotter); IndexWriter writer = new IndexWriter(dir, config); // END IndexCommit commit = null; try { commit = (IndexCommit) snapshotter.snapshot(); Collection<String> fileNames = commit.getFileNames(); /*<iterate over & copy files from fileNames>*/ } finally { snapshotter.release(commit); } }
From source file:com.qwazr.search.bench.LuceneCommonIndex.java
License:Apache License
LuceneCommonIndex(final Path rootDirectory, final String schemaName, final String indexName, final double ramBufferSize, final boolean useCompoundFile) throws IOException { final Path schemaDirectory = Files.createDirectory(rootDirectory.resolve(schemaName)); this.indexDirectory = Files.createDirectory(schemaDirectory.resolve(indexName)); this.luceneDirectory = indexDirectory.resolve("data"); this.dataDirectory = FSDirectory.open(luceneDirectory); final IndexWriterConfig indexWriterConfig = new IndexWriterConfig( new PerFieldAnalyzerWrapper(new StandardAnalyzer())); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriterConfig.setRAMBufferSizeMB(ramBufferSize); final ConcurrentMergeScheduler mergeScheduler = new ConcurrentMergeScheduler(); mergeScheduler.setMaxMergesAndThreads(MAX_SSD_MERGE_THREADS, MAX_SSD_MERGE_THREADS); indexWriterConfig.setMergeScheduler(mergeScheduler); indexWriterConfig.setUseCompoundFile(useCompoundFile); final TieredMergePolicy mergePolicy = new TieredMergePolicy(); indexWriterConfig.setMergePolicy(mergePolicy); // We use snapshots deletion policy final SnapshotDeletionPolicy snapshotDeletionPolicy = new SnapshotDeletionPolicy( indexWriterConfig.getIndexDeletionPolicy()); indexWriterConfig.setIndexDeletionPolicy(snapshotDeletionPolicy); this.indexWriter = new IndexWriter(this.dataDirectory, indexWriterConfig); this.localReplicator = new LocalReplicator(); }
From source file:com.qwazr.search.index.IndexInstance.java
License:Apache License
/** * @param schema/*from ww w . j a v a2 s . c om*/ * @param indexDirectory * @return */ final static IndexInstance newInstance(SchemaInstance schema, File indexDirectory, IndexSettingsDefinition settings) throws ServerException, IOException, ReflectiveOperationException, InterruptedException { UpdatableAnalyzer indexAnalyzer = null; UpdatableAnalyzer queryAnalyzer = null; IndexWriter indexWriter = null; Directory dataDirectory = null; try { if (!indexDirectory.exists()) indexDirectory.mkdir(); if (!indexDirectory.isDirectory()) throw new IOException( "This name is not valid. No directory exists for this location: " + indexDirectory); FileSet fileSet = new FileSet(indexDirectory); //Loading the settings if (settings == null) { settings = fileSet.settingsFile.exists() ? JsonMapper.MAPPER.readValue(fileSet.settingsFile, IndexSettingsDefinition.class) : IndexSettingsDefinition.EMPTY; } else { JsonMapper.MAPPER.writeValue(fileSet.settingsFile, settings); } //Loading the fields File fieldMapFile = new File(indexDirectory, FIELDS_FILE); LinkedHashMap<String, FieldDefinition> fieldMap = fieldMapFile.exists() ? JsonMapper.MAPPER.readValue(fieldMapFile, FieldDefinition.MapStringFieldTypeRef) : new LinkedHashMap<>(); //Loading the fields File analyzerMapFile = new File(indexDirectory, ANALYZERS_FILE); LinkedHashMap<String, AnalyzerDefinition> analyzerMap = analyzerMapFile.exists() ? JsonMapper.MAPPER.readValue(analyzerMapFile, AnalyzerDefinition.MapStringAnalyzerTypeRef) : new LinkedHashMap<>(); AnalyzerContext context = new AnalyzerContext(analyzerMap, fieldMap); indexAnalyzer = new UpdatableAnalyzer(context, context.indexAnalyzerMap); queryAnalyzer = new UpdatableAnalyzer(context, context.queryAnalyzerMap); // Open and lock the data directory dataDirectory = FSDirectory.open(fileSet.dataDirectory.toPath()); // Set IndexWriterConfig indexWriterConfig = new IndexWriterConfig(indexAnalyzer); if (settings != null && settings.similarity_class != null) indexWriterConfig.setSimilarity(IndexUtils.findSimilarity(settings.similarity_class)); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); SnapshotDeletionPolicy snapshotDeletionPolicy = new SnapshotDeletionPolicy( indexWriterConfig.getIndexDeletionPolicy()); indexWriterConfig.setIndexDeletionPolicy(snapshotDeletionPolicy); indexWriter = new IndexWriter(dataDirectory, indexWriterConfig); if (indexWriter.hasUncommittedChanges()) indexWriter.commit(); // Finally we build the SearchSearcherManger SearcherManager searcherManager = new SearcherManager(indexWriter, true, null); return new IndexInstance(schema, dataDirectory, settings, analyzerMap, fieldMap, fileSet, indexWriter, searcherManager, queryAnalyzer); } catch (IOException | ServerException | ReflectiveOperationException | InterruptedException e) { // We failed in opening the index. We close everything we can if (queryAnalyzer != null) IOUtils.closeQuietly(queryAnalyzer); if (indexAnalyzer != null) IOUtils.closeQuietly(indexAnalyzer); if (indexWriter != null) IOUtils.closeQuietly(indexWriter); if (dataDirectory != null) IOUtils.closeQuietly(dataDirectory); throw e; } }
From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java
License:Open Source License
public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception { Directory dir = MMapDirectory.open(directory.toPath()); Analyzer analyzer = new SimpleAnalyzer(); // Upgrade the index in place if necessary. if (doUpgrade && DirectoryReader.indexExists(dir)) { upgradeIndex(dir);// ww w. ja v a 2 s. c om } IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy())); Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT); if (totalMBs != null) { // give half to the index, the other half we keep for service caching context totalMBs = Math.max(1, totalMBs / 2); iwc.setRAMBufferSizeMB(totalMBs); } this.writer = new IndexWriter(dir, iwc); this.writer.commit(); this.indexUpdateTimeMicros = Utils.getNowMicrosUtc(); this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros; return this.writer; }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception { Analyzer analyzer = new SimpleAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT); if (totalMBs != null) { long cacheSizeMB = (totalMBs * 3) / 4; cacheSizeMB = Math.max(1, cacheSizeMB); iwc.setRAMBufferSizeMB(cacheSizeMB); this.linkAccessMemoryLimitMB = totalMBs / 4; }//from w w w . jav a 2s . c o m Directory dir = MMapDirectory.open(directory.toPath()); // Upgrade the index in place if necessary. if (doUpgrade && DirectoryReader.indexExists(dir)) { upgradeIndex(dir); } iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy())); IndexWriter w = new IndexWriter(dir, iwc); w.commit(); synchronized (this.searchSync) { this.writer = w; this.linkAccessTimes.clear(); this.indexUpdateTimeMicros = Utils.getNowMicrosUtc(); this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros; } return this.writer; }
From source file:com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm.java
License:Apache License
private void createWriter() throws IOException { IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, null); config.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); writer = new IndexWriter(dir, config); taxoWriter = new DirectoryTaxonomyWriter(taxoDir); }
From source file:com.xiaomi.linden.hadoop.indexing.reduce.ShardWriter.java
License:Apache License
/** * Constructor/*from ww w. ja v a 2s . c om*/ * @param fs * @param shard * @param tempDir * @param conf * @throws IOException */ public ShardWriter(FileSystem fs, Shard shard, String tempDir, Configuration conf) throws IOException { logger.info("Construct a shard writer"); this.conf = conf; this.fs = fs; localFs = FileSystem.getLocal(conf); perm = new Path(shard.getDirectory()); taxoPerm = new Path(shard.getDirectory() + ".taxonomy"); String indexDir = tempDir + "/" + "index"; String taxoDir = tempDir + "/" + "taxo"; temp = new Path(indexDir); taxoTemp = new Path(taxoDir); if (localFs.exists(temp)) { File tempFile = new File(temp.getName()); if (tempFile.exists()) { LindenReducer.deleteDir(tempFile); } } if (!fs.exists(perm)) { fs.mkdirs(perm); } else { moveToTrash(conf, perm); fs.mkdirs(perm); } if (!fs.exists(taxoPerm)) { fs.mkdirs(taxoPerm); } else { moveToTrash(conf, taxoPerm); fs.mkdirs(taxoPerm); } IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, null); config.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); writer = new IndexWriter(FSDirectory.open(new File(indexDir)), config); taxoWriter = new DirectoryTaxonomyWriter(FSDirectory.open(new File(taxoDir))); }
From source file:io.anserini.IndexerCW09B.java
License:Apache License
public int indexWithThreads(int numThreads) throws IOException, InterruptedException { System.out.println(//from w w w .j av a 2 s . c o m "Indexing with " + numThreads + " threads to directory '" + indexPath.toAbsolutePath() + "'..."); final Directory dir = FSDirectory.open(indexPath); final IndexWriterConfig iwc = new IndexWriterConfig(analyzer()); iwc.setSimilarity(new BM25Similarity()); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); iwc.setRAMBufferSizeMB(256.0); iwc.setUseCompoundFile(false); iwc.setMergeScheduler(new ConcurrentMergeScheduler()); final IndexWriter writer = new IndexWriter(dir, iwc); final ExecutorService executor = Executors.newFixedThreadPool(numThreads); for (Path f : discoverWarcFiles(docDir)) executor.execute(new IndexerThread(writer, f)); //add some delay to let some threads spawn by scheduler Thread.sleep(30000); executor.shutdown(); // Disable new tasks from being submitted try { // Wait for existing tasks to terminate while (!executor.awaitTermination(5, TimeUnit.MINUTES)) { Thread.sleep(1000); } } catch (InterruptedException ie) { // (Re-)Cancel if current thread also interrupted executor.shutdownNow(); // Preserve interrupt status Thread.currentThread().interrupt(); } int numIndexed = writer.maxDoc(); try { writer.commit(); } finally { writer.close(); } return numIndexed; }