List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.indeed.imhotep.builder.tsv.KerberosUtils.java
License:Apache License
/** * Use for testing keytab logins/*from ww w. jav a 2s .c o m*/ */ public static void main(String[] args) throws Exception { KerberosUtils.loginFromKeytab(new BaseConfiguration()); final FileSystem fileSystem = FileSystem.get(new org.apache.hadoop.conf.Configuration()); final Path path = new Path("/CLUSTERNAME"); if (fileSystem.exists(path)) { System.out.println(CharStreams.toString(new InputStreamReader(fileSystem.open(path), Charsets.UTF_8))); } }
From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java
License:Apache License
private void checkPathExists(Path path) { boolean exists; FileSystem fs; Path qualifiedPath = path;//from ww w.j a v a 2s . c om try { fs = getFS(path); qualifiedPath = path.makeQualified(fs); exists = fs.exists(path); } catch (Exception e) { exists = false; } if (!exists) { throw new RuntimeException("The provided path doesn't exist " + qualifiedPath.toString() + "\nFor hdfs files use 'hdfs:' prefix like hdfs:/tmp/file.tsv" + "\nFor local files use 'file://' prefix like file:/tmp/file.tsv"); } }
From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java
License:Apache License
/** * * @return true if upload succeeded//from w w w .j a va 2 s. co m */ private static boolean uploadShard(String localShardDir, String shardName, String indexName, Path finalIndexPath, FileSystem finalFS, boolean qaMode) { final Path finalIndexDirPath = new Path(finalIndexPath, indexName); final Path finalShardPath = new Path(finalIndexDirPath, shardName + ".sqar"); try { if (!finalFS.exists(finalIndexDirPath)) { finalFS.mkdirs(finalIndexDirPath); if (qaMode) { makeWorldWritable(finalFS, finalIndexDirPath); } } if (finalFS.exists(finalShardPath)) { log.info("File already exists. HDFS upload aborted."); return true; } final String scheme = finalFS.getUri().getScheme(); if (scheme.equals("hdfs")) { /* * upload to temp file then rename, * to avoid having other systems see a partial file */ final String tmpUploadShardName = indexName + "-" + shardName; final Path tempUploadPath = new Path(new Path("/tmp/"), tmpUploadShardName + ".sqar"); final File shardDir = new File(localShardDir, shardName); final SquallArchiveWriter writer = new SquallArchiveWriter(finalFS, tempUploadPath, true, SquallArchiveCompressor.GZIP); writer.batchAppendDirectory(shardDir); writer.commit(); finalFS.rename(tempUploadPath, finalShardPath); } else if (scheme.equals("s3n")) { /* * s3 files are only visible after the upload is complete, * so no need to use a temp file */ final File shardDir = new File(localShardDir, shardName); final SquallArchiveWriter writer = new SquallArchiveWriter(finalFS, finalShardPath, true, SquallArchiveCompressor.GZIP); writer.batchAppendDirectory(shardDir); writer.commit(); } } catch (IOException e) { log.error(e); return false; } if (qaMode) { try { // try to set permissions on the uploaded file makeWorldWritable(finalFS, finalShardPath); } catch (Exception e) { log.warn("Failed to set permissions on the uploaded file " + finalShardPath); } } return true; }
From source file:com.inmobi.conduit.AbstractService.java
License:Apache License
protected void publishMissingPaths(FileSystem fs, String destDir, long commitTime, String categoryName) throws Exception { Long prevRuntime = new Long(-1); if (!prevRuntimeForCategory.containsKey(categoryName)) { LOG.debug("Calculating Previous Runtime from Directory Listing"); prevRuntime = getPreviousRuntime(fs, destDir, categoryName); } else {//from w w w . j av a 2s . c o m LOG.debug("Reading Previous Runtime from Cache"); prevRuntime = prevRuntimeForCategory.get(categoryName); } if (prevRuntime != -1) { if (isMissingPaths(commitTime, prevRuntime)) { LOG.debug("Previous Runtime: [" + getLogDateString(prevRuntime) + "]"); Set<Path> pathsToBeRegistered = null; String tableName = null; if (isStreamHCatEnabled(categoryName)) { tableName = getTableName(categoryName); pathsToBeRegistered = pathsToBeregisteredPerTable.get(tableName); } while (isMissingPaths(commitTime, prevRuntime)) { String missingPath = Cluster.getDestDir(destDir, categoryName, prevRuntime); Path missingDir = new Path(missingPath); if (!fs.exists(missingDir)) { LOG.info("Creating Missing Directory [" + missingDir + "]"); fs.mkdirs(missingDir); if (isStreamHCatEnabled(categoryName)) { synchronized (pathsToBeRegistered) { pathsToBeRegistered.add(missingDir); } } ConduitMetrics.updateSWGuage(getServiceType(), EMPTYDIR_CREATE, categoryName, 1); } prevRuntime += MILLISECONDS_IN_MINUTE; } if (isStreamHCatEnabled(categoryName)) { pathsToBeregisteredPerTable.put(tableName, pathsToBeRegistered); } } } // prevRuntimeForCategory map is updated with commitTime, // even if prevRuntime is -1, since service did run at this point prevRuntimeForCategory.put(categoryName, commitTime); }
From source file:com.inmobi.conduit.AbstractService.java
License:Apache License
protected boolean retriableExists(FileSystem fs, Path p, String streamName) throws Exception { int count = 0; boolean result = false; Exception ex = null;//w w w. j a va 2s .co m while (count < numOfRetries) { try { result = fs.exists(p); ex = null; break; } catch (Exception e) { LOG.warn("Error while checking for existence of " + p + " .Retrying ", e); ex = e; if (stopped) break; } count++; try { Thread.sleep(TIME_RETRY_IN_MILLIS); } catch (InterruptedException e) { LOG.error(e); } } if (count == numOfRetries) { LOG.error("Max retries done for mkdirs " + p + " quitting"); } if (ex == null) return result; else throw ex; }
From source file:com.inmobi.conduit.Conduit.java
License:Apache License
private void copyInputFormatJarToClusterFS(Cluster cluster, String inputFormatSrcJar) throws IOException { FileSystem clusterFS = FileSystem.get(cluster.getHadoopConf()); // create jars path inside /conduit/system/tmp path Path jarsPath = new Path(cluster.getTmpPath(), "jars"); if (!clusterFS.exists(jarsPath)) { clusterFS.mkdirs(jarsPath);//from ww w. ja v a2s. c o m } // copy inputFormat source jar into /conduit/system/tmp/jars path Path inputFormatJarDestPath = new Path(jarsPath, "conduit-distcp-current.jar"); if (clusterFS.exists(inputFormatJarDestPath)) { clusterFS.delete(inputFormatJarDestPath, true); } clusterFS.copyFromLocalFile(new Path(inputFormatSrcJar), inputFormatJarDestPath); }
From source file:com.inmobi.conduit.Conduit.java
License:Apache License
private void copyAuditUtilJarToClusterFs(Cluster cluster, String auditUtilSrcJar) throws IOException { FileSystem clusterFS = FileSystem.get(cluster.getHadoopConf()); // create jars path inside /conduit/system/tmp path Path jarsPath = new Path(cluster.getTmpPath(), "jars"); if (!clusterFS.exists(jarsPath)) { clusterFS.mkdirs(jarsPath);/* w w w .ja v a 2 s.c o m*/ } // copy AuditUtil source jar into /conduit/system/tmp/jars path Path AuditUtilJarDestPath = new Path(jarsPath, "messaging-client-core.jar"); if (clusterFS.exists(AuditUtilJarDestPath)) { clusterFS.delete(AuditUtilJarDestPath, true); } clusterFS.copyFromLocalFile(new Path(auditUtilSrcJar), AuditUtilJarDestPath); }
From source file:com.inmobi.conduit.distcp.MirrorStreamService.java
License:Apache License
private Path getFirstOrLastPath(FileSystem fs, Path streamFinalDestDir, boolean returnLast) throws IOException { if (!fs.exists(streamFinalDestDir)) return null; FileStatus streamRoot;/*from ww w . j a va 2 s . c om*/ List<FileStatus> streamPaths = new ArrayList<FileStatus>(); streamRoot = fs.getFileStatus(streamFinalDestDir); recursiveListingTillMinuteDir(fs, streamRoot, streamPaths, 0); if (streamPaths.size() == 0) return null; DatePathComparator comparator = new DatePathComparator(); FileStatus result = streamPaths.get(0); for (int i = 0; i < streamPaths.size(); i++) { FileStatus current = streamPaths.get(i); if (returnLast && comparator.compare(current, result) > 0) result = current; else if (!returnLast && comparator.compare(current, result) < 0) result = current; } if (!result.isDir()) return result.getPath().getParent(); else return result.getPath(); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void deleteMissing(Configuration conf) throws IOException { LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source"); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing); Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq"); CopyListing target = new GlobbedCopyListing(conf, null); List<Path> targets = new ArrayList<Path>(1); Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); targets.add(targetFinalPath);/*w w w . j a v a 2 s . c o m*/ DistCpOptions options = new DistCpOptions(targets, new Path("/NONE")); target.buildListing(targetListing, options); Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing); long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen(); SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf); SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf); long deletedEntries = 0; try { FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); FileStatus trgtFileStatus = new FileStatus(); Text trgtRelPath = new Text(); FileSystem targetFS = targetFinalPath.getFileSystem(conf); boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); while (targetReader.next(trgtRelPath, trgtFileStatus)) { while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) { srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); } if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue; boolean result = (!targetFS.exists(trgtFileStatus.getPath()) || targetFS.delete(trgtFileStatus.getPath(), true)); if (result) { LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source"); deletedEntries++; } else { throw new IOException("Unable to delete " + trgtFileStatus.getPath()); } HadoopCompat.progress(taskAttemptContext); HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. [" + targetReader.getPosition() * 100 / totalLen + "%]"); } } finally { IOUtils.closeStream(sourceReader); IOUtils.closeStream(targetReader); } LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0)); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void commitData(Configuration conf) throws IOException { Path workDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); Path finalDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); FileSystem targetFS = workDir.getFileSystem(conf); LOG.info("Atomic commit enabled. Moving " + workDir + " to " + finalDir); if (targetFS.exists(finalDir) && targetFS.exists(workDir)) if (!targetFS.delete(finalDir, true)) { LOG.error("Unable to delete pre-existing final-data at " + finalDir); throw new IOException("Atomic commit failed. Pre-existing final data" + " in " + finalDir + " could not be cleared, before commit."); }//from ww w. j a v a2 s . c om boolean result = targetFS.rename(workDir, finalDir); if (!result) { LOG.warn("Rename failed. Perhaps data already moved. Verifying..."); result = targetFS.exists(finalDir) && !targetFS.exists(workDir); } if (result) { LOG.info("Data committed successfully to " + finalDir); HadoopCompat.setStatus(taskAttemptContext, "Data committed successfully to " + finalDir); } else { LOG.error("Unable to commit data to " + finalDir); throw new IOException( "Atomic commit failed. Temporary data in " + workDir + ", Unable to move to " + finalDir); } }