List of usage examples for org.apache.hadoop.fs Path equals
@Override public boolean equals(Object o)
From source file:org.apache.oozie.action.hadoop.LauncherMapperHelper.java
License:Apache License
/** * Utility function to load the contents of action data sequence file into * memory object/*ww w. j a va2 s. c om*/ * * @param fs Action Filesystem * @param actionDir Path * @param conf Configuration * @return Map action data * @throws IOException * @throws InterruptedException */ public static Map<String, String> getActionData(final FileSystem fs, final Path actionDir, final Configuration conf) throws IOException, InterruptedException { UserGroupInformationService ugiService = Services.get().get(UserGroupInformationService.class); UserGroupInformation ugi = ugiService.getProxyUser(conf.get(OozieClient.USER_NAME)); return ugi.doAs(new PrivilegedExceptionAction<Map<String, String>>() { @Override public Map<String, String> run() throws IOException { Map<String, String> ret = new HashMap<String, String>(); Path seqFilePath = getActionDataSequenceFilePath(actionDir); if (fs.exists(seqFilePath)) { SequenceFile.Reader seqFile = new SequenceFile.Reader(fs, seqFilePath, conf); Text key = new Text(), value = new Text(); while (seqFile.next(key, value)) { ret.put(key.toString(), value.toString()); } seqFile.close(); } else { // maintain backward-compatibility. to be deprecated org.apache.hadoop.fs.FileStatus[] files = fs.listStatus(actionDir); InputStream is; BufferedReader reader = null; Properties props; if (files != null && files.length > 0) { for (int x = 0; x < files.length; x++) { Path file = files[x].getPath(); if (file.equals(new Path(actionDir, "externalChildIds.properties"))) { is = fs.open(file); reader = new BufferedReader(new InputStreamReader(is)); ret.put(LauncherMapper.ACTION_DATA_EXTERNAL_CHILD_IDS, IOUtils.getReaderAsString(reader, -1)); } else if (file.equals(new Path(actionDir, "newId.properties"))) { is = fs.open(file); reader = new BufferedReader(new InputStreamReader(is)); props = PropertiesUtils.readProperties(reader, -1); ret.put(LauncherMapper.ACTION_DATA_NEW_ID, props.getProperty("id")); } else if (file.equals(new Path(actionDir, LauncherMapper.ACTION_DATA_OUTPUT_PROPS))) { int maxOutputData = conf.getInt(LauncherMapper.CONF_OOZIE_ACTION_MAX_OUTPUT_DATA, 2 * 1024); is = fs.open(file); reader = new BufferedReader(new InputStreamReader(is)); ret.put(LauncherMapper.ACTION_DATA_OUTPUT_PROPS, PropertiesUtils .propertiesToString(PropertiesUtils.readProperties(reader, maxOutputData))); } else if (file.equals(new Path(actionDir, LauncherMapper.ACTION_DATA_STATS))) { int statsMaxOutputData = conf.getInt( LauncherMapper.CONF_OOZIE_EXTERNAL_STATS_MAX_SIZE, Integer.MAX_VALUE); is = fs.open(file); reader = new BufferedReader(new InputStreamReader(is)); ret.put(LauncherMapper.ACTION_DATA_STATS, PropertiesUtils.propertiesToString( PropertiesUtils.readProperties(reader, statsMaxOutputData))); } else if (file.equals(new Path(actionDir, LauncherMapper.ACTION_DATA_ERROR_PROPS))) { is = fs.open(file); reader = new BufferedReader(new InputStreamReader(is)); ret.put(LauncherMapper.ACTION_DATA_ERROR_PROPS, IOUtils.getReaderAsString(reader, -1)); } } } } return ret; } }); }
From source file:org.apache.oozie.action.hadoop.TestJavaActionExecutor.java
License:Apache License
public void testAddToCache() throws Exception { JavaActionExecutor ae = new JavaActionExecutor(); Configuration conf = new XConfiguration(); Path appPath = new Path(getFsTestCaseDir(), "wf"); URI appUri = appPath.toUri(); // test archive without fragment Path archivePath = new Path("test.jar"); Path archiveFullPath = new Path(appPath, archivePath); ae.addToCache(conf, appPath, archiveFullPath.toString(), true); assertTrue(conf.get("mapred.cache.archives").contains(archiveFullPath.toString())); assertTrue(DistributedCache.getSymlink(conf)); // test archive with fragment Path archiveFragmentPath = new Path("test.jar#a.jar"); Path archiveFragmentFullPath = new Path(appPath, archiveFragmentPath); conf.clear();/* w ww . jav a 2 s . co m*/ ae.addToCache(conf, appPath, archiveFragmentFullPath.toString(), true); assertTrue(conf.get("mapred.cache.archives").contains(archiveFragmentFullPath.toString())); assertTrue(DistributedCache.getSymlink(conf)); // test .so without fragment Path appSoPath = new Path("lib/a.so"); Path appSoFullPath = new Path(appPath, appSoPath); conf.clear(); ae.addToCache(conf, appPath, appSoFullPath.toString(), false); assertTrue(conf.get("mapred.cache.files").contains(appSoFullPath.toString())); assertTrue(DistributedCache.getSymlink(conf)); // test .so with fragment Path appSoFragmentPath = new Path("lib/a.so#a.so"); Path appSoFragmentFullPath = new Path(appPath, appSoFragmentPath); conf.clear(); ae.addToCache(conf, appPath, appSoFragmentFullPath.toString(), false); assertTrue(conf.get("mapred.cache.files").contains(appSoFragmentFullPath.toString())); assertTrue(DistributedCache.getSymlink(conf)); // test .jar without fragment where app path is on same cluster as jar path Path appJarPath = new Path("lib/a.jar"); Path appJarFullPath = new Path(appPath, appJarPath); conf = new Configuration(); conf.set(WorkflowAppService.HADOOP_USER, getTestUser()); ae.addToCache(conf, appPath, appJarFullPath.toString(), false); // assert that mapred.cache.files contains jar URI path (full on Hadoop-2) Path jarPath = HadoopShims.isYARN() ? new Path(appJarFullPath.toUri()) : new Path(appJarFullPath.toUri().getPath()); assertTrue(conf.get("mapred.cache.files").contains(jarPath.toString())); // assert that dist cache classpath contains jar URI path Path[] paths = DistributedCache.getFileClassPaths(conf); boolean pathFound = false; for (Path path : paths) { if (path.equals(jarPath)) { pathFound = true; break; } } assertTrue(pathFound); assertTrue(DistributedCache.getSymlink(conf)); // test .jar without fragment where app path is on a different cluster than jar path appJarPath = new Path("lib/a.jar"); appJarFullPath = new Path(appPath, appJarPath); Path appDifferentClusterPath = new Path(new URI(appUri.getScheme(), null, appUri.getHost() + "x", appUri.getPort(), appUri.getPath(), appUri.getQuery(), appUri.getFragment())); conf.clear(); conf.set(WorkflowAppService.HADOOP_USER, getTestUser()); ae.addToCache(conf, appDifferentClusterPath, appJarFullPath.toString(), false); // assert that mapred.cache.files contains absolute jar URI assertTrue(conf.get("mapred.cache.files").contains(appJarFullPath.toString())); assertTrue(DistributedCache.getSymlink(conf)); // test .jar with fragment Path appJarFragmentPath = new Path("lib/a.jar#a.jar"); Path appJarFragmentFullPath = new Path(appPath, appJarFragmentPath); conf.clear(); conf.set(WorkflowAppService.HADOOP_USER, getTestUser()); ae.addToCache(conf, appPath, appJarFragmentFullPath.toString(), false); assertTrue(conf.get("mapred.cache.files").contains(appJarFragmentFullPath.toString())); assertTrue(DistributedCache.getSymlink(conf)); // test regular file without fragment Path appFilePath = new Path("lib/a.txt"); Path appFileFullPath = new Path(appPath, appFilePath); conf.clear(); ae.addToCache(conf, appPath, appFileFullPath.toString(), false); assertTrue(conf.get("mapred.cache.files").contains(appFileFullPath.toString())); assertTrue(DistributedCache.getSymlink(conf)); // test regular file with fragment Path appFileFragmentPath = new Path("lib/a.txt#a.txt"); Path appFileFragmentFullPath = new Path(appPath, appFileFragmentPath); conf.clear(); ae.addToCache(conf, appPath, appFileFragmentFullPath.toString(), false); assertTrue(conf.get("mapred.cache.files").contains(appFileFragmentFullPath.toString())); assertTrue(DistributedCache.getSymlink(conf)); // test path starting with "/" for archive Path testPath = new Path("/tmp/testpath/a.jar#a.jar"); conf.clear(); ae.addToCache(conf, appPath, testPath.toString(), true); assertTrue(conf.get("mapred.cache.archives").contains(testPath.toString())); assertTrue(DistributedCache.getSymlink(conf)); // test path starting with "/" for cache.file conf.clear(); ae.addToCache(conf, appPath, testPath.toString(), false); assertTrue(conf.get("mapred.cache.files").contains(testPath.toString())); assertTrue(DistributedCache.getSymlink(conf)); // test absolute path for archive Path testAbsolutePath = new Path("hftp://namenode.test.com:8020/tmp/testpath/a.jar#a.jar"); conf.clear(); ae.addToCache(conf, appPath, testAbsolutePath.toString(), true); assertTrue(conf.get("mapred.cache.archives").contains(testAbsolutePath.toString())); assertTrue(DistributedCache.getSymlink(conf)); // test absolute path for cache files conf.clear(); ae.addToCache(conf, appPath, testAbsolutePath.toString(), false); assertTrue(conf.get("mapred.cache.files").contains(testAbsolutePath.toString())); assertTrue(DistributedCache.getSymlink(conf)); // test relative path for archive conf.clear(); ae.addToCache(conf, appPath, "lib/a.jar#a.jar", true); assertTrue(conf.get("mapred.cache.archives").contains(appUri.getPath() + "/lib/a.jar#a.jar")); assertTrue(DistributedCache.getSymlink(conf)); // test relative path for cache files conf.clear(); ae.addToCache(conf, appPath, "lib/a.jar#a.jar", false); assertTrue(conf.get("mapred.cache.files").contains(appUri.getPath() + "/lib/a.jar#a.jar")); assertTrue(DistributedCache.getSymlink(conf)); }
From source file:org.apache.reef.runtime.yarn.driver.restart.DFSEvaluatorLogOverwriteReaderWriter.java
License:Apache License
/** * Gets the alternative path. Returns one of changeLogPath and changeLogAltPath. *///from www . java 2 s . c om private synchronized Path getAlternativePath(final Path path) { if (path.equals(changeLogPath)) { return changeLogAltPath; } return changeLogPath; }
From source file:org.apache.spark.network.yarn.YarnShuffleService.java
License:Apache License
/** * Figure out the recovery path and handle moving the DB if YARN NM recovery gets enabled * and DB exists in the local dir of NM by old version of shuffle service. *//*from ww w. j a va 2 s. c o m*/ protected File initRecoveryDb(String dbName) { Preconditions.checkNotNull(_recoveryPath, "recovery path should not be null if NM recovery is enabled"); File recoveryFile = new File(_recoveryPath.toUri().getPath(), dbName); if (recoveryFile.exists()) { return recoveryFile; } // db doesn't exist in recovery path go check local dirs for it String[] localDirs = _conf.getTrimmedStrings("yarn.nodemanager.local-dirs"); for (String dir : localDirs) { File f = new File(new Path(dir).toUri().getPath(), dbName); if (f.exists()) { // If the recovery path is set then either NM recovery is enabled or another recovery // DB has been initialized. If NM recovery is enabled and had set the recovery path // make sure to move all DBs to the recovery path from the old NM local dirs. // If another DB was initialized first just make sure all the DBs are in the same // location. Path newLoc = new Path(_recoveryPath, dbName); Path copyFrom = new Path(f.toURI()); if (!newLoc.equals(copyFrom)) { logger.info("Moving " + copyFrom + " to: " + newLoc); try { // The move here needs to handle moving non-empty directories across NFS mounts FileSystem fs = FileSystem.getLocal(_conf); fs.rename(copyFrom, newLoc); } catch (Exception e) { // Fail to move recovery file to new path, just continue on with new DB location logger.error("Failed to move recovery file {} to the path {}", dbName, _recoveryPath.toString(), e); } } return new File(newLoc.toUri().getPath()); } } return new File(_recoveryPath.toUri().getPath(), dbName); }
From source file:org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.java
License:Apache License
/** * Specific method because we need to set the input converter class according to the * input infos. Note that any mapper instruction before reblock can work on binary block * if it can work on binary cell as well. * /*from ww w .ja v a 2 s. c o m*/ * @param job job configuration * @param inputIndexes array of byte indexes * @param inputs array of input string * @param inputInfos array of input infos * @param brlens array of block row lengths * @param bclens array of block column lengths * @throws Exception if Exception occurs */ public static void setUpMultipleInputsReblock(JobConf job, byte[] inputIndexes, String[] inputs, InputInfo[] inputInfos, int[] brlens, int[] bclens) throws Exception { if (inputs.length != inputInfos.length) throw new Exception("number of inputs and inputInfos does not match"); //set up names of the input matrices and their inputformat information job.setStrings(INPUT_MATRICIES_DIRS_CONFIG, inputs); MRJobConfiguration.setMapFunctionInputMatrixIndexes(job, inputIndexes); for (int i = 0; i < inputs.length; i++) { ConvertTarget target = ConvertTarget.CELL; if (inputInfos[i] == InputInfo.BinaryBlockInputInfo) target = ConvertTarget.BLOCK; setInputInfo(job, inputIndexes[i], inputInfos[i], brlens[i], bclens[i], target); } //remove redundant input files ArrayList<Path> paths = new ArrayList<>(); for (int i = 0; i < inputs.length; i++) { String name = inputs[i]; Path p = new Path(name); boolean redundant = false; for (Path ep : paths) if (ep.equals(p)) { redundant = true; break; } if (redundant) continue; MultipleInputs.addInputPath(job, p, inputInfos[i].inputFormatClass); paths.add(p); } }
From source file:org.apache.tajo.storage.s3.InMemoryFileSystemStore.java
License:Apache License
@Override public Set<Path> listSubPaths(Path path) throws IOException { Path normalizedPath = normalize(path); // This is inefficient but more than adequate for testing purposes. Set<Path> subPaths = new LinkedHashSet<Path>(); for (Path p : inodes.tailMap(normalizedPath).keySet()) { if (normalizedPath.equals(p.getParent())) { subPaths.add(p);/* w ww . ja va 2s .co m*/ } } return subPaths; }
From source file:org.apache.tajo.storage.s3.S3TableSpace.java
License:Apache License
/** * Calculate the total size of all objects in the indicated bucket * * @param path to use// w w w . j av a 2 s . c o m * @return calculated size * @throws IOException */ @Override public long calculateSize(Path path) throws IOException { long totalBucketSize = 0L; if (s3Enabled) { String key = pathToKey(path); final FileStatus fileStatus = fs.getFileStatus(path); if (fileStatus.isDirectory()) { if (!key.isEmpty()) { key = key + "/"; } ListObjectsRequest request = new ListObjectsRequest(); request.setBucketName(uri.getHost()); request.setPrefix(key); request.setMaxKeys(maxKeys); if (LOG.isDebugEnabled()) { LOG.debug("listStatus: doing listObjects for directory " + key); } ObjectListing objects = s3.listObjects(request); while (true) { for (S3ObjectSummary summary : objects.getObjectSummaries()) { Path keyPath = keyToPath(summary.getKey()).makeQualified(uri, fs.getWorkingDirectory()); // Skip over keys that are ourselves and old S3N _$folder$ files if (keyPath.equals(path) || summary.getKey().endsWith(S3N_FOLDER_SUFFIX)) { if (LOG.isDebugEnabled()) { LOG.debug("Ignoring: " + keyPath); } continue; } if (!objectRepresentsDirectory(summary.getKey(), summary.getSize())) { totalBucketSize += summary.getSize(); } } if (objects.isTruncated()) { if (LOG.isDebugEnabled()) { LOG.debug("listStatus: list truncated - getting next batch"); } objects = s3.listNextBatchOfObjects(objects); } else { break; } } } else { return fileStatus.getLen(); } } else { totalBucketSize = fs.getContentSummary(path).getLength(); } return totalBucketSize; }
From source file:org.bgi.flexlab.gaea.data.mapreduce.input.bam.GaeaBamInputFormat.java
License:Open Source License
private int addIndexedSplits(List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration cfg) throws IOException { Path file = ((FileSplit) splits.get(i)).getPath(); SplittingBAMIndex idx = new SplittingBAMIndex(file.getFileSystem(cfg).open(getIdxPath(file))); int splitsEnd = splits.size(); for (int j = i; j < splitsEnd; j++) { if (!file.equals(((FileSplit) splits.get(j)).getPath())) splitsEnd = j;//from w w w . j a v a 2 s. c o m } for (int j = i; j < splitsEnd; j++) { FileSplit fileSplit = (FileSplit) splits.get(j); long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); Long blockStart = idx.nextAlignment(start); Long blockEnd = Long.valueOf(j == splitsEnd - 1 ? idx.prevAlignment(end).longValue() | 0xFFFF : idx.nextAlignment(end).longValue()); if (blockStart == null) { throw new RuntimeException("Internal error or invalid index: no block start for " + start); } if (blockEnd == null) { throw new RuntimeException("Internal error or invalid index: no block end for " + end); } newSplits.add(new FileVirtualSplit(file, blockStart.longValue(), blockEnd.longValue(), fileSplit.getLocations())); } return splitsEnd; }
From source file:org.broadinstitute.sting.gatk.hadoop.BAMInputFormat.java
License:Open Source License
private int addIndexedSplits(List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration cfg) throws IOException { final Path file = ((FileSplit) splits.get(i)).getPath(); final SplittingBAMIndex idx = new SplittingBAMIndex(file.getFileSystem(cfg).open(getIdxPath(file))); int splitsEnd = splits.size(); for (int j = i; j < splitsEnd; ++j) if (!file.equals(((FileSplit) splits.get(j)).getPath())) splitsEnd = j;/*from w ww . java2 s . c o m*/ for (int j = i; j < splitsEnd; ++j) { final FileSplit fileSplit = (FileSplit) splits.get(j); final long start = fileSplit.getStart(); final long end = start + fileSplit.getLength(); final Long blockStart = idx.nextAlignment(start); // The last split needs to end where the last alignment ends, but // the index doesn't store that data (whoops); we only know where the // last alignment begins. Fortunately there's no need to change the index // format for this: we can just set the end to the maximal length of // the final BGZF block (0xffff), and then read until BAMRecordCodec // hits EOF. final Long blockEnd = j == splitsEnd - 1 ? idx.prevAlignment(end) | 0xffff : idx.nextAlignment(end); if (blockStart == null) throw new RuntimeException("Internal error or invalid index: no block start for " + start); if (blockEnd == null) throw new RuntimeException("Internal error or invalid index: no block end for " + end); newSplits.add(new FileVirtualSplit(file, blockStart, blockEnd, fileSplit.getLocations())); } return splitsEnd; }
From source file:org.broadinstitute.sting.gatk.hadoop.LociInputFormat.java
License:Open Source License
private int addIndexedSplits(List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration cfg) throws IOException { final Path file = ((FileSplit) splits.get(i)).getPath(); final SplittingBAMIndex idx = new SplittingBAMIndex(file.getFileSystem(cfg).open(getIdxPath(file))); int splitsEnd = splits.size(); for (int j = i; j < splitsEnd; ++j) if (!file.equals(((FileSplit) splits.get(j)).getPath())) splitsEnd = j;//from www . j a v a 2s. c o m for (int j = i; j < splitsEnd; ++j) { final FileSplit fileSplit = (FileSplit) splits.get(j); final long start = fileSplit.getStart(); final long end = start + fileSplit.getLength(); final Long blockStart = idx.nextAlignment(start); // The last split needs to end where the last alignment ends, but // the index doesn't store that data (whoops); we only know where the // last alignment begins. Fortunately there's no need to change the index // format for this: we can just set the end to the maximal length of // the final BGZF block (0xffff), and then read until BAMRecordCodec // hits EOF. Long blockEnd = // j == splitsEnd-1 ? idx.prevAlignment(end) | 0xffff j == splitsEnd - 1 ? idx.prevAlignment(end) : idx.nextAlignment(end); if (blockStart == null) throw new RuntimeException("Internal error or invalid index: no block start for " + start); if (blockEnd == null) throw new RuntimeException("Internal error or invalid index: no block end for " + end); newSplits.add(new FileVirtualSplit(file, blockStart, blockEnd, fileSplit.getLocations())); } return splitsEnd; }