List of usage examples for org.apache.lucene.index DirectoryReader indexExists
public static boolean indexExists(Directory directory) throws IOException
true
if an index likely exists at the specified directory. From source file:org.apache.blur.manager.indexserver.LocalIndexServer.java
License:Apache License
private Map<String, BlurIndex> openFromDisk() throws IOException { String table = _tableContext.getDescriptor().getName(); Path tablePath = _tableContext.getTablePath(); File tableFile = new File(tablePath.toUri()); if (tableFile.isDirectory()) { Map<String, BlurIndex> shards = new ConcurrentHashMap<String, BlurIndex>(); int shardCount = _tableContext.getDescriptor().getShardCount(); for (int i = 0; i < shardCount; i++) { Directory directory;//from ww w .j ava2 s. c o m String shardName = ShardUtil.getShardName(BlurConstants.SHARD_PREFIX, i); if (_ramDir) { directory = new RAMDirectory(); } else { File file = new File(tableFile, shardName); file.mkdirs(); directory = new MMapDirectory(file); } if (!DirectoryReader.indexExists(directory)) { new IndexWriter(directory, new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer())) .close(); } shards.put(shardName, openIndex(table, shardName, directory)); } return shards; } throw new IOException("Table [" + table + "] not found."); }
From source file:org.apache.blur.manager.writer.BlurIndexSimpleWriter.java
License:Apache License
public BlurIndexSimpleWriter(ShardContext shardContext, Directory directory, SharedMergeScheduler mergeScheduler, final ExecutorService searchExecutor, BlurIndexCloser indexCloser, Timer indexImporterTimer, Timer bulkIndexingTimer, ThriftCache thriftCache) throws IOException { super(shardContext, directory, mergeScheduler, searchExecutor, indexCloser, indexImporterTimer, bulkIndexingTimer, thriftCache); _thriftCache = thriftCache;/* w w w .j a va2 s . c o m*/ _commaSplitter = Splitter.on(','); _bulkWriters = new ConcurrentHashMap<String, BlurIndexSimpleWriter.BulkEntry>(); _indexImporterTimer = indexImporterTimer; _bulkIndexingTimer = bulkIndexingTimer; _searchThreadPool = searchExecutor; _shardContext = shardContext; _tableContext = _shardContext.getTableContext(); _context = _tableContext.getTable() + "/" + shardContext.getShard(); _fieldManager = _tableContext.getFieldManager(); _discoverableFields = _tableContext.getDiscoverableFields(); _accessControlFactory = _tableContext.getAccessControlFactory(); TableDescriptor descriptor = _tableContext.getDescriptor(); Map<String, String> tableProperties = descriptor.getTableProperties(); if (tableProperties != null) { String value = tableProperties.get(BlurConstants.BLUR_RECORD_SECURITY); if (value != null && value.equals(TRUE)) { LOG.info("Record Level Security has been enabled for table [{0}] shard [{1}]", _tableContext.getTable(), _shardContext.getShard()); _security = true; } else { _security = false; } } else { _security = false; } Analyzer analyzer = _fieldManager.getAnalyzerForIndex(); _conf = new IndexWriterConfig(LUCENE_VERSION, analyzer); _conf.setWriteLockTimeout(TimeUnit.MINUTES.toMillis(5)); _conf.setCodec(new Blur024Codec(_tableContext.getBlurConfiguration())); _conf.setSimilarity(_tableContext.getSimilarity()); _conf.setInfoStream(new LoggingInfoStream(_tableContext.getTable(), _shardContext.getShard())); TieredMergePolicy mergePolicy = (TieredMergePolicy) _conf.getMergePolicy(); mergePolicy.setUseCompoundFile(false); _conf.setMergeScheduler(mergeScheduler.getMergeScheduler()); _snapshotIndexDeletionPolicy = new SnapshotIndexDeletionPolicy(_tableContext.getConfiguration(), SnapshotIndexDeletionPolicy.getGenerationsPath(_shardContext.getHdfsDirPath())); _policy = new IndexDeletionPolicyReader(_snapshotIndexDeletionPolicy); _conf.setIndexDeletionPolicy(_policy); BlurConfiguration blurConfiguration = _tableContext.getBlurConfiguration(); _queue = new ArrayBlockingQueue<RowMutation>( blurConfiguration.getInt(BLUR_SHARD_QUEUE_MAX_INMEMORY_LENGTH, 100)); _mutationQueueProcessor = new MutationQueueProcessor(_queue, this, _shardContext, _writesWaiting); if (!DirectoryReader.indexExists(directory)) { new BlurIndexWriter(directory, _conf).close(); } _directory = directory; _indexCloser = indexCloser; DirectoryReader realDirectoryReader = DirectoryReader.open(_directory); DirectoryReader wrappped = wrap(realDirectoryReader); String message = "BlurIndexSimpleWriter - inital open"; DirectoryReader directoryReader = checkForMemoryLeaks(wrappped, message); _indexReader.set(directoryReader); openWriter(); _watchForIdleBulkWriters = new TimerTask() { @Override public void run() { for (BulkEntry bulkEntry : _bulkWriters.values()) { bulkEntry._lock.lock(); try { if (!bulkEntry.isClosed() && bulkEntry.isIdle()) { LOG.info("Bulk Entry [{0}] has become idle and now closing.", bulkEntry); try { bulkEntry.close(); } catch (IOException e) { LOG.error("Unkown error while trying to close bulk writer when it became idle.", e); } } } finally { bulkEntry._lock.unlock(); } } } }; long delay = TimeUnit.SECONDS.toMillis(30); _bulkIndexingTimer.schedule(_watchForIdleBulkWriters, delay, delay); }
From source file:org.apache.blur.manager.writer.IndexImporter.java
License:Apache License
@Override public void run() { // Only allow one import to occur in the process at a time. _globalLock.lock();//from ww w .j a v a 2 s. com try { if (_lastCleanup + _cleanupDelay < System.currentTimeMillis()) { try { cleanupOldDirs(); } catch (IOException e) { LOG.error("Unknown error while trying to clean old directories on [{1}/{2}].", e, _shard, _table); } _lastCleanup = System.currentTimeMillis(); } Path path = _shardContext.getHdfsDirPath(); Configuration configuration = _shardContext.getTableContext().getConfiguration(); try { FileSystem fileSystem = path.getFileSystem(configuration); SortedSet<FileStatus> listStatus; while (true) { try { listStatus = sort(fileSystem.listStatus(path, new PathFilter() { @Override public boolean accept(Path path) { if (path != null && path.getName().endsWith(COMMIT)) { return true; } return false; } })); break; } catch (FileNotFoundException e) { LOG.warn("File not found error, retrying."); } try { Thread.sleep(100); } catch (InterruptedException e) { return; } } for (FileStatus fileStatus : listStatus) { Path file = fileStatus.getPath(); if (fileStatus.isDir() && file.getName().endsWith(COMMIT)) { // rename to inuse, if good continue else rename to badindex Path inuse = new Path(file.getParent(), rename(file.getName(), INUSE)); touch(fileSystem, new Path(file, INPROGRESS)); if (fileSystem.rename(file, inuse)) { if (_testError != null) { _testError.run(); } HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, inuse); try { if (DirectoryReader.indexExists(hdfsDirectory)) { IndexAction indexAction = getIndexAction(hdfsDirectory, fileSystem); _blurIndex.process(indexAction); return; } else { Path badindex = new Path(file.getParent(), rename(file.getName(), BADINDEX)); if (fileSystem.rename(inuse, badindex)) { LOG.error( "Directory found at [{0}] is not a vaild index, renaming to [{1}].", inuse, badindex); } else { LOG.fatal( "Directory found at [{0}] is not a vaild index, could not rename to [{1}].", inuse, badindex); } } } finally { hdfsDirectory.close(); } } else { LOG.fatal("Could not rename [{0}] to inuse dir.", file); } } } } catch (IOException e) { LOG.error("Unknown error while trying to refresh imports on [{1}/{2}].", e, _shard, _table); } } finally { _globalLock.unlock(); } }
From source file:org.apache.blur.manager.writer.MutatableActionTest.java
License:Apache License
private DirectoryReader getIndexReader(RAMDirectory directory) throws IOException { if (!DirectoryReader.indexExists(directory)) { new IndexWriter(directory, _conf.clone()).close(); }/*from w w w . j a v a 2 s . com*/ return DirectoryReader.open(directory); }
From source file:org.apache.blur.store.hdfs_v2.FastHdfsKeyValueDirectoryTest.java
License:Apache License
private int getDocumentCount(Directory directory) throws IOException { if (DirectoryReader.indexExists(directory)) { DirectoryReader reader = DirectoryReader.open(directory); int maxDoc = reader.maxDoc(); reader.close();//from ww w . ja va 2 s . c o m return maxDoc; } return 0; }
From source file:org.apache.blur.store.hdfs_v2.FastHdfsKeyValueDirectoryTest.java
License:Apache License
private void assertFiles(Set<String> expected, int run, int commit, FastHdfsKeyValueDirectory directory) throws IOException { Set<String> actual; if (DirectoryReader.indexExists(directory)) { List<IndexCommit> listCommits = DirectoryReader.listCommits(directory); // assertEquals(1, listCommits.size()); IndexCommit indexCommit = listCommits.get(0); actual = new TreeSet<String>(indexCommit.getFileNames()); } else {//from w w w.j a v a 2s . c o m actual = new TreeSet<String>(); } Set<String> missing = new TreeSet<String>(expected); missing.removeAll(actual); Set<String> extra = new TreeSet<String>(actual); extra.removeAll(expected); assertEquals("Pass [" + run + "] Missing Files " + " Extra Files " + extra + "", expected, actual); }
From source file:org.apache.blur.thrift.TableAdmin.java
License:Apache License
private void validateIndexesExist(Path shardPath, FileSystem fileSystem, Configuration configuration) throws IOException { FileStatus[] listStatus = fileSystem.listStatus(shardPath, new PathFilter() { @Override// www . j a v a2s . c om public boolean accept(Path path) { return path.getName().endsWith(".commit"); } }); for (FileStatus fileStatus : listStatus) { Path path = fileStatus.getPath(); HdfsDirectory directory = new HdfsDirectory(configuration, path); try { if (!DirectoryReader.indexExists(directory)) { throw new IOException("Path [" + path + "] is not a valid index."); } } finally { directory.close(); } } }
From source file:org.apache.maven.index.context.DefaultIndexingContext.java
License:Apache License
private void prepareIndex(boolean reclaimIndex) throws IOException, ExistingLuceneIndexMismatchException { if (DirectoryReader.indexExists(indexDirectory)) { try {/*from www. ja v a 2s. c o m*/ // unlock the dir forcibly if (IndexWriter.isLocked(indexDirectory)) { unlockForcibly(lockFactory, indexDirectory); } openAndWarmup(); checkAndUpdateIndexDescriptor(reclaimIndex); } catch (IOException e) { if (reclaimIndex) { prepareCleanIndex(true); } else { throw e; } } } else { prepareCleanIndex(false); } timestamp = IndexUtils.getTimestamp(indexDirectory); }
From source file:org.apache.roller.weblogger.business.search.IndexManagerImpl.java
License:Apache License
private boolean indexExists() { try {/*w w w . ja va2 s. com*/ return DirectoryReader.indexExists(getIndexDirectory()); } catch (IOException e) { mLogger.error("Problem accessing index directory", e); } return false; }
From source file:org.apache.tika.parser.geo.topic.GeoNameResolver.java
License:Apache License
/** * Search corresponding GeoName for each location entity * // ww w.ja v a 2 s .c om * @param querystr * it's the NER actually * @return HashMap each name has a list of resolved entities * @throws IOException * @throws RuntimeException */ public HashMap<String, ArrayList<String>> searchGeoName(ArrayList<String> locationNameEntities) throws IOException { if (locationNameEntities.size() == 0 || locationNameEntities.get(0).length() == 0) return new HashMap<String, ArrayList<String>>(); Logger logger = Logger.getLogger(this.getClass().getName()); if (!DirectoryReader.indexExists(indexDir)) { logger.log(Level.SEVERE, "No Lucene Index Dierctory Found, Invoke indexBuild() First !"); System.exit(1); } IndexReader reader = DirectoryReader.open(indexDir); if (locationNameEntities.size() >= 200) hitsPerPage = 5; // avoid heavy computation IndexSearcher searcher = new IndexSearcher(reader); Query q = null; HashMap<String, ArrayList<ArrayList<String>>> allCandidates = new HashMap<String, ArrayList<ArrayList<String>>>(); for (String name : locationNameEntities) { if (!allCandidates.containsKey(name)) { try { // q = new QueryParser("name", analyzer).parse(name); q = new MultiFieldQueryParser(new String[] { "name", "alternatenames" }, analyzer).parse(name); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; ArrayList<ArrayList<String>> topHits = new ArrayList<ArrayList<String>>(); for (int i = 0; i < hits.length; ++i) { ArrayList<String> tmp1 = new ArrayList<String>(); ArrayList<String> tmp2 = new ArrayList<String>(); int docId = hits[i].doc; Document d; try { d = searcher.doc(docId); tmp1.add(d.get("name")); tmp1.add(d.get("longitude")); tmp1.add(d.get("latitude")); if (!d.get("alternatenames").equalsIgnoreCase(d.get("name"))) { tmp2.add(d.get("alternatenames")); tmp2.add(d.get("longitude")); tmp2.add(d.get("latitude")); } } catch (IOException e) { e.printStackTrace(); } topHits.add(tmp1); if (tmp2.size() != 0) topHits.add(tmp2); } allCandidates.put(name, topHits); } catch (org.apache.lucene.queryparser.classic.ParseException e) { e.printStackTrace(); } } } HashMap<String, ArrayList<String>> resolvedEntities = new HashMap<String, ArrayList<String>>(); pickBestCandidates(resolvedEntities, allCandidates); reader.close(); return resolvedEntities; }