List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:cascading.flow.MapReduceFlow.java
License:Open Source License
private Map<String, Tap> createSources(JobConf jobConf) { Path[] paths = FileInputFormat.getInputPaths(jobConf); Map<String, Tap> taps = new HashMap<String, Tap>(); for (Path path : paths) taps.put(path.toString(), new Hfs(new NullScheme(), path.toString())); return taps;/*from w w w . ja v a 2s.c om*/ }
From source file:cascading.flow.tez.Hadoop2TezFlowStep.java
License:Open Source License
private static void setWorkingDirectory(Configuration conf) { String name = conf.get(JobContext.WORKING_DIR); if (name != null) return;//from w w w . j a v a 2s . c om try { Path dir = FileSystem.get(conf).getWorkingDirectory(); conf.set(JobContext.WORKING_DIR, dir.toString()); } catch (IOException exception) { throw new RuntimeException(exception); } }
From source file:cascading.flow.tez.util.TezUtil.java
License:Open Source License
public static void setSourcePathForSplit(MRInput input, MRReader reader, Configuration configuration) { Path path = null; if (Util.returnInstanceFieldIfExistsSafe(input, "useNewApi")) { org.apache.hadoop.mapreduce.InputSplit newInputSplit = (org.apache.hadoop.mapreduce.InputSplit) reader .getSplit();//from w w w .ja v a 2s .co m if (newInputSplit instanceof org.apache.hadoop.mapreduce.lib.input.FileSplit) path = ((org.apache.hadoop.mapreduce.lib.input.FileSplit) newInputSplit).getPath(); } else { org.apache.hadoop.mapred.InputSplit oldInputSplit = (org.apache.hadoop.mapred.InputSplit) reader .getSplit(); if (oldInputSplit instanceof org.apache.hadoop.mapred.FileSplit) path = ((org.apache.hadoop.mapred.FileSplit) oldInputSplit).getPath(); } if (path != null) configuration.set(MultiInputSplit.CASCADING_SOURCE_PATH, path.toString()); }
From source file:cascading.tap.hadoop.BaseDistCacheTap.java
License:Open Source License
@Override public TupleEntryIterator openForRead(FlowProcess<? extends Configuration> flowProcess, RecordReader input) throws IOException { // always read via Hadoop FileSystem if in standalone/local mode, or if an RecordReader is provided if (HadoopUtil.isLocal(flowProcess.getConfig()) || input != null) { LOG.info("delegating to parent"); return super.openForRead(flowProcess, input); }// w w w . ja v a 2 s.c om Path[] cachedFiles = getLocalCacheFiles(flowProcess); if (cachedFiles == null || cachedFiles.length == 0) return super.openForRead(flowProcess, null); List<Path> paths = new ArrayList<>(); List<Tap> taps = new ArrayList<>(); if (isSimpleGlob()) { FileSystem fs = FileSystem.get(flowProcess.getConfig()); FileStatus[] statuses = fs.globStatus(getHfs().getPath()); for (FileStatus status : statuses) paths.add(status.getPath()); } else { paths.add(getHfs().getPath()); } for (Path pathToFind : paths) { for (Path path : cachedFiles) { if (path.toString().endsWith(pathToFind.getName())) { LOG.info("found {} in distributed cache", path); taps.add(new Lfs(getScheme(), path.toString())); } } } if (paths.isEmpty()) // not in cache, read from HDFS { LOG.info("could not find files in local resource path. delegating to parent: {}", super.getIdentifier()); return super.openForRead(flowProcess, input); } return new MultiSourceTap(taps.toArray(new Tap[taps.size()])).openForRead(flowProcess, input); }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
/** * copies all files from the taskoutputpath to the outputpath * * @param conf/*w w w .j a v a 2 s . c om*/ */ public static void commitTask(JobConf conf) throws IOException { Path taskOutputPath = new Path(conf.get("mapred.work.output.dir")); FileSystem fs = getFSSafe(conf, taskOutputPath); if (fs == null) return; AtomicInteger integer = pathCounts.get(taskOutputPath.toString()); if (integer.decrementAndGet() != 0) return; String taskId = conf.get("mapred.task.id"); LOG.info("committing task: '" + taskId + "' - " + taskOutputPath); if (taskOutputPath != null) { if (writeDirectlyToWorkingPath(conf, taskOutputPath)) return; if (fs.exists(taskOutputPath)) { Path jobOutputPath = taskOutputPath.getParent().getParent(); // Move the task outputs to their final place moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath); // Delete the temporary task-specific output directory if (!fs.delete(taskOutputPath, true)) LOG.info("failed to delete the temporary output directory of task: '" + taskId + "' - " + taskOutputPath); LOG.info("saved output of task '" + taskId + "' to " + jobOutputPath); } } }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
static void setWorkOutputPath(JobConf conf, Path outputDir) { outputDir = new Path(conf.getWorkingDirectory(), outputDir); conf.set("mapred.work.output.dir", outputDir.toString()); }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
public static void makeTempPath(JobConf conf) throws IOException { // create job specific temporary directory in output path Path outputPath = FileOutputFormat.getOutputPath(conf); if (outputPath != null) { Path tmpDir = new Path(outputPath, TEMPORARY_PATH); FileSystem fileSys = tmpDir.getFileSystem(conf); if (!fileSys.exists(tmpDir) && !fileSys.mkdirs(tmpDir)) { LOG.error("mkdirs failed to create " + tmpDir.toString()); }/*from w w w .j ava2s.co m*/ } }
From source file:cascading.tap.hadoop.Hfs.java
License:Open Source License
public boolean deleteChildResource(Configuration conf, String childIdentifier) throws IOException { Path childPath = new Path(childIdentifier).makeQualified(getFileSystem(conf)); if (!childPath.toString().startsWith(getFullIdentifier(conf))) return false; return deleteFullIdentifier(conf, childPath.toString()); }
From source file:cascading.tap.hadoop.Hfs.java
License:Open Source License
private void getChildPaths(Configuration conf, Set<String> results, int trim, Path path, int depth) throws IOException { if (depth == 0) { String substring = path.toString().substring(trim); String identifier = getIdentifier(); if (identifier == null || identifier.isEmpty()) results.add(new Path(substring).toString()); else/*from w w w. j a v a 2 s . c o m*/ results.add(new Path(identifier, substring).toString()); return; } FileStatus[] statuses = getFileSystem(conf).listStatus(path, HIDDEN_FILES_FILTER); if (statuses == null) return; for (FileStatus fileStatus : statuses) getChildPaths(conf, results, trim, fileStatus.getPath(), depth - 1); }
From source file:cascading.tap.hadoop.HttpFileSystem.java
License:Open Source License
private URL makeUrl(Path path) throws IOException { if (path.toString().startsWith(scheme)) return URI.create(path.toString()).toURL(); try {/* w w w . j a v a2 s . c o m*/ return new URI(scheme, authority, path.toString(), null, null).toURL(); } catch (URISyntaxException exception) { throw new IOException(exception.getMessage()); } }