List of usage examples for org.apache.lucene.index LogDocMergePolicy LogDocMergePolicy
public LogDocMergePolicy()
From source file:edu.cmu.geolocator.io.GetWriter.java
License:Apache License
public static IndexWriter getIndexWriter(String indexdirectory, double buffersize) throws IOException { Directory dir;/*from w w w . ja v a2 s.c o m*/ if (OSUtil.isWindows()) dir = FSDirectory.open(new File(indexdirectory)); else dir = NIOFSDirectory.open(new File(indexdirectory)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); config.setRAMBufferSizeMB(buffersize); LogDocMergePolicy mergePolicy = new LogDocMergePolicy(); mergePolicy.setMergeFactor(3); config.setMergePolicy(mergePolicy); IndexWriter writer = new IndexWriter(dir, config); return writer; }
From source file:indexer.LuceneIndexer.java
/** * Indexing the files. This method checks for the directories and then * finishes out after the indexing is complete. * @param global This is for reference to the global class variables * and methods./*from www . j a v a2 s.c o m*/ * @param createIndex If true a new index will be created from scratch * and the old index will be destroyed. * @param indexPanel If true it will also print the console printout lines * to the main panel. */ public static void IndexFiles(Global global, Boolean createIndex) { String dataDir = global.dataDir; String indexDir = global.indexDir; //Verifies that the data directory exists if (dataDir == null) { System.err.println("Data Directory Is not accessable, Unable to Index files."); } //Verifies that the data directory is readable and writeable final Path docDir = Paths.get(dataDir); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); } startTime = new Date(); try { System.out.println("Indexing to directory '" + indexDir + "'..."); //Setups the analyzer Analyzer analyzer; try (Directory dir = FSDirectory.open(Paths.get(indexDir))) { analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (createIndex) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } iwc.setRAMBufferSizeMB(global.RAM_BUFFER_SIZE); iwc.setMaxBufferedDocs(global.MAX_BUFFERED_DOCS); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMergeFactor(global.MERGE_FACTOR); iwc.setMergePolicy(ldmp); try (IndexWriter writer = new IndexWriter(dir, iwc)) { hm.clear(); indexDocs(writer, docDir, global); //This is a costly operation, we scheduled the time to apply it if (global.merge) { System.out.println("Starting Merge"); writer.forceMerge(1); global.merge = false; } writer.close(); } finishTime = new Date(); long millis = finishTime.getTime() - startTime.getTime(); totalTime = String.format("%02dhr %02dmin %02dsec", TimeUnit.MILLISECONDS.toHours(millis), TimeUnit.MILLISECONDS.toMinutes(millis) - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(millis)), // The change is in this line TimeUnit.MILLISECONDS.toSeconds(millis) - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis))); System.out.println(""); System.out.println(""); System.out.println("Start Time: " + global.sdf.format(startTime.getTime())); System.out.println("Building List Time: " + listBuildTime); System.out.println("Indexing Time: " + indexingTime); System.out.println("Total Time: " + totalTime); System.out.println("Number of Documents: " + amountOfDocuments); System.out.println("Finish Time: " + global.sdf.format(finishTime.getTime())); System.out.println(""); } analyzer.close(); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); log.fatal(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:org.alfresco.repo.search.impl.lucene.index.IndexInfo.java
License:Open Source License
/** * Construct an index in the given directory. * /*from w w w. j av a2s.co m*/ * @param indexDirectory File * @param config LuceneConfig */ private IndexInfo(File indexDirectory, LuceneConfig config) { super(); initialiseTransitions(); this.config = config; if (config != null) { this.readWriteLock = new ReentrantReadWriteLock(config.getFairLocking()); this.maxFieldLength = config.getIndexerMaxFieldLength(); this.threadPoolExecutor = config.getThreadPoolExecutor(); IndexInfo.useNIOMemoryMapping = config.getUseNioMemoryMapping(); this.maxDocsForInMemoryMerge = config.getMaxDocsForInMemoryMerge(); this.maxRamInMbForInMemoryMerge = config.getMaxRamInMbForInMemoryMerge(); this.maxDocsForInMemoryIndex = config.getMaxDocsForInMemoryIndex(); this.maxRamInMbForInMemoryIndex = config.getMaxRamInMbForInMemoryIndex(); this.writerMaxBufferedDocs = config.getWriterMaxBufferedDocs(); this.writerRamBufferSizeMb = config.getWriterRamBufferSizeMb(); this.writerMergeFactor = config.getWriterMergeFactor(); this.writerMaxMergeDocs = config.getWriterMaxMergeDocs(); this.mergerMaxBufferedDocs = config.getMergerMaxBufferedDocs(); this.mergerRamBufferSizeMb = config.getMergerRamBufferSizeMb(); this.mergerMergeFactor = config.getMergerMergeFactor(); this.mergerMaxMergeDocs = config.getMergerMaxMergeDocs(); this.termIndexInterval = config.getTermIndexInterval(); this.mergerTargetOverlays = config.getMergerTargetOverlayCount(); this.mergerTargetIndexes = config.getMergerTargetIndexCount(); this.mergerTargetOverlaysBlockingFactor = config.getMergerTargetOverlaysBlockingFactor(); // Work out the relative path of the index try { String indexRoot = new File(config.getIndexRootLocation()).getCanonicalPath(); this.relativePath = indexDirectory.getCanonicalPath().substring(indexRoot.length() + 1); } catch (IOException e) { throw new AlfrescoRuntimeException("Failed to determine index relative path", e); } } else { this.readWriteLock = new ReentrantReadWriteLock(false); // need a default thread pool .... TraceableThreadFactory threadFactory = new TraceableThreadFactory(); threadFactory.setThreadDaemon(true); threadFactory.setThreadPriority(5); threadPoolExecutor = new ThreadPoolExecutor(10, 10, 90, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), threadFactory, new ThreadPoolExecutor.CallerRunsPolicy()); // Create a 'fake' relative path try { this.relativePath = indexDirectory.getCanonicalPath(); int sepIndex = this.relativePath.indexOf(File.separator); if (sepIndex != -1) { if (this.relativePath.length() > sepIndex + 1) { this.relativePath = this.relativePath.substring(sepIndex + 1); } else { this.relativePath = ""; } } } catch (IOException e) { throw new AlfrescoRuntimeException("Failed to determine index relative path", e); } } // Create an empty in memory index IndexWriter writer; try { writer = new IndexWriter(emptyIndex, new AlfrescoStandardAnalyser(), true, MaxFieldLength.LIMITED); writer.setUseCompoundFile(writerUseCompoundFile); writer.setMaxBufferedDocs(writerMaxBufferedDocs); writer.setRAMBufferSizeMB(writerRamBufferSizeMb); writer.setMergeFactor(writerMergeFactor); writer.setMaxMergeDocs(writerMaxMergeDocs); writer.setWriteLockTimeout(writeLockTimeout); writer.setMaxFieldLength(maxFieldLength); writer.setTermIndexInterval(termIndexInterval); writer.setMergeScheduler(new SerialMergeScheduler()); writer.setMergePolicy(new LogDocMergePolicy()); writer.close(); } catch (IOException e) { throw new IndexerException("Failed to create an empty in memory index!"); } this.indexDirectory = indexDirectory; // Make sure the directory exists if (!this.indexDirectory.exists()) { if (!this.indexDirectory.mkdirs()) { throw new AlfrescoRuntimeException("Failed to create index directory"); } } if (!this.indexDirectory.isDirectory()) { throw new AlfrescoRuntimeException("The index must be held in a directory"); } // Create the info files. File indexInfoFile = new File(this.indexDirectory, INDEX_INFO); File indexInfoBackupFile = new File(this.indexDirectory, INDEX_INFO_BACKUP); if (createFile(indexInfoFile) && createFile(indexInfoBackupFile)) { // If both files required creation this is a new index version = 0; } // Open the files and channels for the index info file and the backup this.indexInfoRAF = openFile(indexInfoFile); this.indexInfoChannel = this.indexInfoRAF.getChannel(); this.indexInfoBackupRAF = openFile(indexInfoBackupFile); this.indexInfoBackupChannel = this.indexInfoBackupRAF.getChannel(); // If the index found no info files (i.e. it is new), check if there is // an old style index and covert it. if (version == 0) { // Check if an old style index exists final File oldIndex = new File(this.indexDirectory, OLD_INDEX); if (IndexReader.indexExists(oldIndex)) { getWriteLock(); try { doWithFileLock(new LockWork<Object>() { public Object doWork() throws Exception { IndexWriter writer; try { writer = new IndexWriter(oldIndex, new AlfrescoStandardAnalyser(), false, MaxFieldLength.LIMITED); writer.setUseCompoundFile(writerUseCompoundFile); writer.setMaxBufferedDocs(writerMaxBufferedDocs); writer.setRAMBufferSizeMB(writerRamBufferSizeMb); writer.setMergeFactor(writerMergeFactor); writer.setMaxMergeDocs(writerMaxMergeDocs); writer.setWriteLockTimeout(writeLockTimeout); writer.setMaxFieldLength(maxFieldLength); writer.setTermIndexInterval(termIndexInterval); writer.setMergeScheduler(new SerialMergeScheduler()); writer.setMergePolicy(new LogDocMergePolicy()); writer.optimize(); long docs = writer.numDocs(); writer.close(); IndexEntry entry = new IndexEntry(IndexType.INDEX, OLD_INDEX, "", TransactionStatus.COMMITTED, "", docs, 0, false); indexEntries.put(OLD_INDEX, entry); writeStatus(); // The index exists and we should initialise the single reader registerReferenceCountingIndexReader(entry.getName(), buildReferenceCountingIndexReader(entry.getName(), entry.getDocumentCount())); } catch (IOException e) { throw new IndexerException("Failed to optimise old index"); } return null; } public boolean canRetry() { return false; } }); } finally { releaseWriteLock(); } } } // The index exists else if (version == -1) { getWriteLock(); try { doWithFileLock(new LockWork<Object>() { public Object doWork() throws Exception { setStatusFromFile(); // If the index is not shared we can do some easy clean // up if (!indexIsShared) { HashSet<String> deletable = new HashSet<String>(); // clean up for (IndexEntry entry : indexEntries.values()) { switch (entry.getStatus()) { // states which can be deleted // We could check prepared states can be // committed. case ACTIVE: case MARKED_ROLLBACK: case NO_TRANSACTION: case PREPARING: case ROLLEDBACK: case ROLLINGBACK: case MERGE_TARGET: case UNKNOWN: case PREPARED: case DELETABLE: if (s_logger.isInfoEnabled()) { s_logger.info("Deleting index entry " + entry); } entry.setStatus(TransactionStatus.DELETABLE); deletable.add(entry.getName()); break; // States which are in mid-transition which we // can roll back to the committed state case COMMITTED_DELETING: case MERGE: if (s_logger.isInfoEnabled()) { s_logger.info("Resetting merge to committed " + entry); } entry.setStatus(TransactionStatus.COMMITTED); registerReferenceCountingIndexReader(entry.getName(), buildReferenceCountingIndexReader(entry.getName(), entry.getDocumentCount())); break; // Complete committing (which is post database // commit) case COMMITTING: // do the commit if (s_logger.isInfoEnabled()) { s_logger.info("Committing " + entry); } entry.setStatus(TransactionStatus.COMMITTED); registerReferenceCountingIndexReader(entry.getName(), buildReferenceCountingIndexReader(entry.getName(), entry.getDocumentCount())); break; // States that require no action case COMMITTED: registerReferenceCountingIndexReader(entry.getName(), buildReferenceCountingIndexReader(entry.getName(), entry.getDocumentCount())); break; default: // nothing to do break; } } // Delete entries that are not required invalidateMainReadersFromFirst(deletable); for (String id : deletable) { indexEntries.remove(id); } clearOldReaders(); cleaner.schedule(); merger.schedule(); // persist the new state writeStatus(); } return null; } public boolean canRetry() { return false; } }); } finally { releaseWriteLock(); } } // Need to do with file lock - must share info about other readers to support this with shared indexer // implementation getWriteLock(); try { LockWork<Object> work = new DeleteUnknownGuidDirectories(); doWithFileLock(work); } finally { releaseWriteLock(); } // Run the cleaner around every 20 secods - this just makes the request to the thread pool timer.schedule(new TimerTask() { @Override public void run() { cleaner.schedule(); } }, 0, 20000); publishDiscoveryEvent(); }
From source file:org.alfresco.repo.search.impl.lucene.index.IndexInfo.java
License:Open Source License
/** * Make a lucene index writer//w w w . ja v a2 s .co m * * @param location File * @param analyzer Analyzer * @return IndexWriter * @throws IOException */ private IndexWriter makeDeltaIndexWriter(File location, Analyzer analyzer) throws IOException { IndexWriter writer; if (!IndexReader.indexExists(location)) { writer = new IndexWriter(location, analyzer, true, MaxFieldLength.LIMITED); } else { writer = new IndexWriter(location, analyzer, false, MaxFieldLength.LIMITED); } writer.setUseCompoundFile(writerUseCompoundFile); writer.setMaxBufferedDocs(writerMaxBufferedDocs); writer.setRAMBufferSizeMB(writerRamBufferSizeMb); writer.setMergeFactor(writerMergeFactor); writer.setMaxMergeDocs(writerMaxMergeDocs); writer.setWriteLockTimeout(writeLockTimeout); writer.setMaxFieldLength(maxFieldLength); writer.setTermIndexInterval(termIndexInterval); writer.setMergeScheduler(new SerialMergeScheduler()); writer.setMergePolicy(new LogDocMergePolicy()); return writer; }
From source file:org.apache.oodt.cas.filemgr.catalog.LuceneCatalog.java
License:Apache License
private synchronized void removeProductDocument(Product product) throws CatalogException { try {// w w w . j a v a 2s . c o m reader = DirectoryReader.open(indexDir); } catch (IOException e) { e.printStackTrace(); } try { LOG.log(Level.FINE, "LuceneCatalog: remove document from index for product: [" + product.getProductId() + "]"); IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMergeFactor(mergeFactor); config.setMergePolicy(lmp); IndexWriter writer = new IndexWriter(indexDir, config); writer.deleteDocuments(new Term("product_id", product.getProductId())); writer.close(); } catch (IOException e) { LOG.log(Level.WARNING, "Exception removing product: [" + product.getProductName() + "] from index: Message: " + e.getMessage()); throw new CatalogException(e.getMessage(), e); } finally { if (reader != null) { try { reader.close(); } catch (Exception ignore) { } } } }
From source file:org.apache.oodt.cas.filemgr.catalog.LuceneCatalog.java
License:Apache License
private synchronized void addCompleteProductToIndex(CompleteProduct cp) throws CatalogException { IndexWriter writer = null;//www . ja v a 2 s . c om try { /*writer = new IndexWriter(indexFilePath, new StandardAnalyzer(), createIndex);*/ //writer.setCommitLockTimeout(this.commitLockTimeout * 1000); //writer.setWriteLockTimeout(this.writeLockTimeout * 1000); IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMergeFactor(mergeFactor); config.setMergePolicy(lmp); writer = new IndexWriter(indexDir, config); Document doc = toDoc(cp.getProduct(), cp.getMetadata()); writer.addDocument(doc); // TODO: determine a better way to optimize the index } catch (Exception e) { LOG.log(Level.WARNING, "Unable to index product: [" + cp.getProduct().getProductName() + "]: Message: " + e.getMessage(), e); throw new CatalogException("Unable to index product: [" + cp.getProduct().getProductName() + "]: Message: " + e.getMessage(), e); } finally { try { if (writer != null) { writer.close(); } } catch (Exception e) { System.out.println("failed" + e.getLocalizedMessage()); } } }
From source file:org.apache.oodt.cas.filemgr.tools.OptimizeLuceneCatalog.java
License:Apache License
public void doOptimize() { IndexWriter writer = null;/*from www.j a va 2s .c o m*/ boolean createIndex = false; try { writer = new IndexWriter(reader.directory(), config); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMergeFactor(this.mergeFactor); config.setMergePolicy(lmp); long timeBefore = System.currentTimeMillis(); //TODO http://blog.trifork.com/2011/11/21/simon-says-optimize-is-bad-for-you/ //writer.optimize(); long timeAfter = System.currentTimeMillis(); double numSeconds = ((timeAfter - timeBefore) * 1.0) / DOUBLE; LOG.log(Level.INFO, "LuceneCatalog: [" + this.catalogPath + "] optimized: took: [" + numSeconds + "] seconds"); } catch (IOException e) { LOG.log(Level.WARNING, "Unable to optimize lucene index: [" + catalogPath + "]: Message: " + e.getMessage()); } finally { try { writer.close(); } catch (Exception ignore) { } } }
From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepository.java
License:Apache License
private synchronized void removeWorkflowInstanceDocument(WorkflowInstance inst) throws InstanceRepositoryException { IndexReader reader = null;/* w ww .ja v a2s .co m*/ try { reader = DirectoryReader.open(indexDir); } catch (IOException e) { e.printStackTrace(); } try { reader = DirectoryReader.open(indexDir); IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMergeFactor(mergeFactor); config.setMergePolicy(lmp); IndexWriter writer = new IndexWriter(indexDir, config); LOG.log(Level.FINE, "LuceneWorkflowEngine: remove document from index for workflow instance: [" + inst.getId() + "]"); writer.deleteDocuments(new Term("workflow_inst_id", inst.getId())); writer.close(); } catch (IOException e) { LOG.log(Level.SEVERE, e.getMessage()); LOG.log(Level.WARNING, "Exception removing workflow instance: [" + inst.getId() + "] from index: Message: " + e.getMessage()); throw new InstanceRepositoryException(e.getMessage()); } finally { if (reader != null) { try { reader.close(); } catch (Exception ignore) { } } } }
From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepository.java
License:Apache License
private synchronized void addWorkflowInstanceToCatalog(WorkflowInstance wInst) throws InstanceRepositoryException { IndexWriter writer = null;/*from w w w . j a v a 2s . c o m*/ try { IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMergeFactor(mergeFactor); config.setMergePolicy(lmp); writer = new IndexWriter(indexDir, config); Document doc = toDoc(wInst); writer.addDocument(doc); } catch (IOException e) { LOG.log(Level.WARNING, "Unable to index workflow instance: [" + wInst.getId() + "]: Message: " + e.getMessage()); throw new InstanceRepositoryException( "Unable to index workflow instance: [" + wInst.getId() + "]: Message: " + e.getMessage()); } finally { try { writer.close(); } catch (Exception e) { System.out.println(e); } } }
From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepositoryFactory.java
License:Apache License
public WorkflowInstanceRepository createInstanceRepository() { Directory indexDir = null;//from ww w . j a v a2s. c o m try { indexDir = FSDirectory.open(new File(indexFilePath).toPath()); } catch (IOException e) { e.printStackTrace(); } // Create the index if it does not already exist IndexWriter writer = null; try { IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); LogMergePolicy lmp = new LogDocMergePolicy(); config.setMergePolicy(lmp); writer = new IndexWriter(indexDir, config); } catch (Exception e) { LOG.severe("Unable to create index: " + e.getMessage()); } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { LOG.severe("Unable to close index: " + e.getMessage()); } } } return new LuceneWorkflowInstanceRepository(indexFilePath, pageSize); }