Example usage for org.apache.hadoop.fs Path toString

List of usage examples for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:cascading.flow.MapReduceFlow.java

License:Open Source License

private Map<String, Tap> createSources(JobConf jobConf) {
    Path[] paths = FileInputFormat.getInputPaths(jobConf);

    Map<String, Tap> taps = new HashMap<String, Tap>();

    for (Path path : paths)
        taps.put(path.toString(), new Hfs(new NullScheme(), path.toString()));

    return taps;/*from   w  w w  . ja  v  a 2s.c  om*/
}

From source file:cascading.flow.tez.Hadoop2TezFlowStep.java

License:Open Source License

private static void setWorkingDirectory(Configuration conf) {
    String name = conf.get(JobContext.WORKING_DIR);

    if (name != null)
        return;//from  w w  w  . j  a  v a 2s .  c  om

    try {
        Path dir = FileSystem.get(conf).getWorkingDirectory();
        conf.set(JobContext.WORKING_DIR, dir.toString());
    } catch (IOException exception) {
        throw new RuntimeException(exception);
    }
}

From source file:cascading.flow.tez.util.TezUtil.java

License:Open Source License

public static void setSourcePathForSplit(MRInput input, MRReader reader, Configuration configuration) {
    Path path = null;

    if (Util.returnInstanceFieldIfExistsSafe(input, "useNewApi")) {
        org.apache.hadoop.mapreduce.InputSplit newInputSplit = (org.apache.hadoop.mapreduce.InputSplit) reader
                .getSplit();//from w  w w  .ja v a  2s  .co m

        if (newInputSplit instanceof org.apache.hadoop.mapreduce.lib.input.FileSplit)
            path = ((org.apache.hadoop.mapreduce.lib.input.FileSplit) newInputSplit).getPath();
    } else {
        org.apache.hadoop.mapred.InputSplit oldInputSplit = (org.apache.hadoop.mapred.InputSplit) reader
                .getSplit();

        if (oldInputSplit instanceof org.apache.hadoop.mapred.FileSplit)
            path = ((org.apache.hadoop.mapred.FileSplit) oldInputSplit).getPath();
    }

    if (path != null)
        configuration.set(MultiInputSplit.CASCADING_SOURCE_PATH, path.toString());
}

From source file:cascading.tap.hadoop.BaseDistCacheTap.java

License:Open Source License

@Override
public TupleEntryIterator openForRead(FlowProcess<? extends Configuration> flowProcess, RecordReader input)
        throws IOException {
    // always read via Hadoop FileSystem if in standalone/local mode, or if an RecordReader is provided
    if (HadoopUtil.isLocal(flowProcess.getConfig()) || input != null) {
        LOG.info("delegating to parent");
        return super.openForRead(flowProcess, input);
    }// w w  w  . ja  v a  2 s.c  om

    Path[] cachedFiles = getLocalCacheFiles(flowProcess);

    if (cachedFiles == null || cachedFiles.length == 0)
        return super.openForRead(flowProcess, null);

    List<Path> paths = new ArrayList<>();
    List<Tap> taps = new ArrayList<>();

    if (isSimpleGlob()) {
        FileSystem fs = FileSystem.get(flowProcess.getConfig());
        FileStatus[] statuses = fs.globStatus(getHfs().getPath());

        for (FileStatus status : statuses)
            paths.add(status.getPath());
    } else {
        paths.add(getHfs().getPath());
    }

    for (Path pathToFind : paths) {
        for (Path path : cachedFiles) {
            if (path.toString().endsWith(pathToFind.getName())) {
                LOG.info("found {} in distributed cache", path);
                taps.add(new Lfs(getScheme(), path.toString()));
            }
        }
    }

    if (paths.isEmpty()) // not in cache, read from HDFS
    {
        LOG.info("could not find files in local resource path. delegating to parent: {}",
                super.getIdentifier());
        return super.openForRead(flowProcess, input);
    }

    return new MultiSourceTap(taps.toArray(new Tap[taps.size()])).openForRead(flowProcess, input);
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

/**
 * copies all files from the taskoutputpath to the outputpath
 *
 * @param conf/*w  w  w .j a v a 2 s  . c om*/
 */
public static void commitTask(JobConf conf) throws IOException {
    Path taskOutputPath = new Path(conf.get("mapred.work.output.dir"));

    FileSystem fs = getFSSafe(conf, taskOutputPath);

    if (fs == null)
        return;

    AtomicInteger integer = pathCounts.get(taskOutputPath.toString());

    if (integer.decrementAndGet() != 0)
        return;

    String taskId = conf.get("mapred.task.id");

    LOG.info("committing task: '" + taskId + "' - " + taskOutputPath);

    if (taskOutputPath != null) {
        if (writeDirectlyToWorkingPath(conf, taskOutputPath))
            return;

        if (fs.exists(taskOutputPath)) {
            Path jobOutputPath = taskOutputPath.getParent().getParent();
            // Move the task outputs to their final place
            moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath);

            // Delete the temporary task-specific output directory
            if (!fs.delete(taskOutputPath, true))
                LOG.info("failed to delete the temporary output directory of task: '" + taskId + "' - "
                        + taskOutputPath);

            LOG.info("saved output of task '" + taskId + "' to " + jobOutputPath);
        }
    }
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

static void setWorkOutputPath(JobConf conf, Path outputDir) {
    outputDir = new Path(conf.getWorkingDirectory(), outputDir);
    conf.set("mapred.work.output.dir", outputDir.toString());
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

public static void makeTempPath(JobConf conf) throws IOException {
    // create job specific temporary directory in output path
    Path outputPath = FileOutputFormat.getOutputPath(conf);

    if (outputPath != null) {
        Path tmpDir = new Path(outputPath, TEMPORARY_PATH);
        FileSystem fileSys = tmpDir.getFileSystem(conf);

        if (!fileSys.exists(tmpDir) && !fileSys.mkdirs(tmpDir)) {
            LOG.error("mkdirs failed to create " + tmpDir.toString());
        }/*from w  w  w  .j  ava2s.co m*/
    }
}

From source file:cascading.tap.hadoop.Hfs.java

License:Open Source License

public boolean deleteChildResource(Configuration conf, String childIdentifier) throws IOException {
    Path childPath = new Path(childIdentifier).makeQualified(getFileSystem(conf));

    if (!childPath.toString().startsWith(getFullIdentifier(conf)))
        return false;

    return deleteFullIdentifier(conf, childPath.toString());
}

From source file:cascading.tap.hadoop.Hfs.java

License:Open Source License

private void getChildPaths(Configuration conf, Set<String> results, int trim, Path path, int depth)
        throws IOException {
    if (depth == 0) {
        String substring = path.toString().substring(trim);
        String identifier = getIdentifier();

        if (identifier == null || identifier.isEmpty())
            results.add(new Path(substring).toString());
        else/*from   w w  w.  j  a v  a 2  s . c o m*/
            results.add(new Path(identifier, substring).toString());

        return;
    }

    FileStatus[] statuses = getFileSystem(conf).listStatus(path, HIDDEN_FILES_FILTER);

    if (statuses == null)
        return;

    for (FileStatus fileStatus : statuses)
        getChildPaths(conf, results, trim, fileStatus.getPath(), depth - 1);
}

From source file:cascading.tap.hadoop.HttpFileSystem.java

License:Open Source License

private URL makeUrl(Path path) throws IOException {
    if (path.toString().startsWith(scheme))
        return URI.create(path.toString()).toURL();

    try {/* w  w  w .  j a v  a2 s . c o  m*/
        return new URI(scheme, authority, path.toString(), null, null).toURL();
    } catch (URISyntaxException exception) {
        throw new IOException(exception.getMessage());
    }
}