Example usage for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path)

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:org.notmysock.tez.BroadcastTest.java

License:Apache License

public boolean run(Configuration conf, boolean doLocalityCheck) throws Exception {
    System.out.println("Running BroadcastTest");
    // conf and UGI
    TezConfiguration tezConf;//  w  ww  . j  a v  a2  s  .  com
    if (conf != null) {
        tezConf = new TezConfiguration(conf);
    } else {
        tezConf = new TezConfiguration();
    }
    tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true);
    UserGroupInformation.setConfiguration(tezConf);
    String user = UserGroupInformation.getCurrentUser().getShortUserName();

    // staging dir
    FileSystem fs = FileSystem.get(tezConf);
    String stagingDirStr = Path.SEPARATOR + "user" + Path.SEPARATOR + user + Path.SEPARATOR + ".staging"
            + Path.SEPARATOR + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
    Path stagingDir = new Path(stagingDirStr);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
    stagingDir = fs.makeQualified(stagingDir);

    Path jobJar = new Path(stagingDir, "job.jar");
    fs.copyFromLocalFile(getCurrentJarURL(), jobJar);

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put("job.jar", createLocalResource(fs, jobJar));

    TezClient tezSession = null;
    // needs session or else TaskScheduler does not hold onto containers
    tezSession = TezClient.create("BroadcastTest", tezConf);
    tezSession.addAppMasterLocalFiles(localResources);
    tezSession.start();

    DAGClient dagClient = null;

    try {
        DAG dag = createDAG(fs, tezConf, stagingDir, localResources);

        dag.addTaskLocalFiles(localResources);

        tezSession.waitTillReady();
        dagClient = tezSession.submitDAG(dag);

        // monitoring
        DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(null);
        if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
            System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics());
            return false;
        }
        return true;
    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }
}

From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtil.java

License:Apache License

/**
 * Add an file path to the current set of classpath entries. It adds the file
 * to cache as well./*w ww  .j  a  v  a 2 s.co m*/
 *
 * This is copied from Hadoop 0.20.2 o.a.h.filecache.DistributedCache so we can inject the correct path separator
 * for the environment the cluster is executing in. See {@link #getClusterPathSeparator()}.
 *
 * @param file Path of the file to be added
 * @param conf Configuration that contains the classpath setting
 */
public void addFileToClassPath(Path file, Configuration conf) throws IOException {

    // TODO Replace this with a Hadoop shim if we end up having version-specific implementations scattered around
    if (VersionInfo.getVersion().contains("0.21")) {
        DistributedCache.addFileToClassPath(file, conf);
    } else {
        String classpath = conf.get("mapred.job.classpath.files");
        conf.set("mapred.job.classpath.files",
                classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString());
        FileSystem fs = FileSystem.get(conf);
        URI uri = fs.makeQualified(file).toUri();

        DistributedCache.addCacheFile(uri, conf);
    }
}

From source file:org.pentaho.hadoop.shim.cdh50.HadoopShim.java

License:Apache License

@Override
public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
    fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider());
    setDistributedCacheUtil(new DistributedCacheUtilImpl(config) {

        public void addFileToClassPath(Path file, Configuration conf) throws IOException {
            String classpath = conf.get("mapred.job.classpath.files");
            conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
                    : classpath + getClusterPathSeparator() + file.toString());
            FileSystem fs = FileSystem.get(conf);
            URI uri = fs.makeQualified(file).toUri();

            DistributedCache.addCacheFile(uri, conf);
        }//from w  ww  .  j  ava2  s .c o  m

        public String getClusterPathSeparator() {
            // Use a comma rather than an OS-specific separator (see https://issues.apache.org/jira/browse/HADOOP-4864)
            return System.getProperty("hadoop.cluster.path.separator", ",");
        }
    });
}

From source file:org.pentaho.hadoop.shim.common.DistributedCacheUtilImpl.java

License:Apache License

/**
 * Add an file path to the current set of classpath entries. It adds the file to cache as well.
 * <p/>/*from w  w w . j  a v a 2s  .c  o  m*/
 * This is copied from Hadoop 0.20.2 o.a.h.filecache.DistributedCache so we can inject the correct path separator for
 * the environment the cluster is executing in. See {@link #getClusterPathSeparator()}.
 *
 * @param file Path of the file to be added
 * @param conf Configuration that contains the classpath setting
 */
public void addFileToClassPath(Path file, Configuration conf) throws IOException {

    // TODO Replace this with a Hadoop shim if we end up having version-specific implementations scattered around

    // Save off the classloader, to make sure the version info can be loaded successfully from the hadoop-common JAR
    ClassLoader cl = Thread.currentThread().getContextClassLoader();
    Thread.currentThread().setContextClassLoader(VersionInfo.class.getClassLoader());

    // Get the version string or set to a default value
    String version;
    try {
        version = VersionInfo.getVersion();
    } catch (Throwable t) {
        version = "unknown";
    }

    // Restore the original classloader
    Thread.currentThread().setContextClassLoader(cl);

    String classpath = conf.get("mapred.job.classpath.files");
    conf.set("mapred.job.classpath.files",
            classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString());
    FileSystem fs = FileSystem.get(conf);
    URI uri = fs.makeQualified(file).toUri();

    DistributedCache.addCacheFile(uri, conf);
}

From source file:org.pentaho.hadoop.shim.common.HadoopShimImpl.java

License:Apache License

@Override
public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
    validateHadoopHomeWithWinutils();/*from  w  w w . j a v a2 s  . co  m*/
    fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider());
    setDistributedCacheUtil(new DistributedCacheUtilImpl(config) {

        public void addFileToClassPath(Path file, Configuration conf) throws IOException {
            String classpath = conf.get("mapred.job.classpath.files");
            conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
                    : classpath + getClusterPathSeparator() + file.toString());
            FileSystem fs = FileSystem.get(file.toUri(), conf);
            URI uri = fs.makeQualified(file).toUri();

            DistributedCache.addCacheFile(uri, conf);
        }

        public String getClusterPathSeparator() {
            // Use a comma rather than an OS-specific separator (see https://issues.apache.org/jira/browse/HADOOP-4864)
            return System.getProperty("hadoop.cluster.path.separator", ",");
        }
    });
}

From source file:org.pentaho.hadoop.shim.emr32.HadoopShim.java

License:Apache License

@Override
public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
    fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider());
    setDistributedCacheUtil(new DistributedCacheUtilImpl(config) {

        public void addFileToClassPath(Path file, Configuration conf) throws IOException {
            String classpath = conf.get("mapred.job.classpath.files");
            conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
                    : classpath + getClusterPathSeparator() + file.toString());
            FileSystem fs = FileSystem.get(conf);
            URI uri = fs.makeQualified(file).toUri();
            //Job.getInstance( conf ).addCacheFile( uri );
            DistributedCache.addCacheFile(uri, conf);
        }//from  www.j  a  v  a  2s  .  c o  m

        public String getClusterPathSeparator() {
            // Use a comma rather than an OS-specific separator (see https://issues.apache.org/jira/browse/HADOOP-4864)
            return System.getProperty("hadoop.cluster.path.separator", ",");
        }

    });
    if (!fsm.hasProvider("s3n")) {
        fsm.addProvider(config, "s3n", config.getIdentifier(), new HDFSFileProvider());
    }

}

From source file:org.pentaho.hadoop.shim.hdp20.HadoopShim.java

License:Apache License

@Override
public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
    fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider());
    setDistributedCacheUtil(new DistributedCacheUtilImpl(config) {

        public void addFileToClassPath(Path file, Configuration conf) throws IOException {
            String classpath = conf.get("mapred.job.classpath.files");
            conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
                    : classpath + getClusterPathSeparator() + file.toString());
            FileSystem fs = FileSystem.get(conf);
            URI uri = fs.makeQualified(file).toUri();

            DistributedCache.addCacheFile(uri, conf);
        }// www .j  a  v  a  2s . c  om

        public String getClusterPathSeparator() {
            return System.getProperty("hadoop.cluster.path.separator", ",");
        }
    });
}

From source file:org.pentaho.hadoop.shim.hsp101.HadoopShim.java

License:Apache License

@Override
public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
    fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider());
    setDistributedCacheUtil(new DistributedCacheUtilImpl(config) {
        /**/* w  w  w  .  java2  s  . co  m*/
         * Default permission for cached files
         * <p/>
         * Not using FsPermission.createImmutable due to EOFExceptions when using it with Hadoop 0.20.2
         */
        private final FsPermission CACHED_FILE_PERMISSION = new FsPermission((short) 0755);

        public void addFileToClassPath(Path file, Configuration conf) throws IOException {
            String classpath = conf.get("mapred.job.classpath.files");
            conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
                    : classpath + getClusterPathSeparator() + file.toString());
            FileSystem fs = FileSystem.get(conf);
            URI uri = fs.makeQualified(file).toUri();

            DistributedCache.addCacheFile(uri, conf);
        }

        /**
         * Stages the source file or folder to a Hadoop file system and sets their permission and replication
         * value appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of
         * dest before staging the archive.
         *
         * @param source    File or folder to copy to the file system. If it is a folder all contents will be
         *                  copied into dest.
         * @param fs        Hadoop file system to store the contents of the archive in
         * @param dest      Destination to copy source into. If source is a file, the new file name will be
         *                  exactly dest. If source is a folder its contents will be copied into dest. For more
         *                  info see {@link FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path,
         *                  org.apache.hadoop.fs.Path)}.
         * @param overwrite Should an existing file or folder be overwritten? If not an exception will be
         *                  thrown.
         * @throws IOException         Destination exists is not a directory
         * @throws KettleFileException Source does not exist or destination exists and overwrite is false.
         */
        public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite)
                throws IOException, KettleFileException {
            if (!source.exists()) {
                throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                        "DistributedCacheUtil.SourceDoesNotExist", source));
            }

            if (fs.exists(dest)) {
                if (overwrite) {
                    // It is a directory, clear it out
                    fs.delete(dest, true);
                } else {
                    throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                            "DistributedCacheUtil.DestinationExists", dest.toUri().getPath()));
                }
            }

            // Use the same replication we'd use for submitting jobs
            short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10);

            copyFile(source, fs, dest, overwrite);
            fs.setReplication(dest, replication);
        }

        private void copyFile(FileObject source, FileSystem fs, Path dest, boolean overwrite)
                throws IOException {
            if (source.getType() == FileType.FOLDER) {
                fs.mkdirs(dest);
                fs.setPermission(dest, CACHED_FILE_PERMISSION);
                for (FileObject fileObject : source.getChildren()) {
                    copyFile(fileObject, fs, new Path(dest, fileObject.getName().getBaseName()), overwrite);
                }
            } else {
                try (FSDataOutputStream fsDataOutputStream = fs.create(dest, overwrite)) {
                    IOUtils.copy(source.getContent().getInputStream(), fsDataOutputStream);
                    fs.setPermission(dest, CACHED_FILE_PERMISSION);
                }
            }
        }

        public String getClusterPathSeparator() {
            return System.getProperty("hadoop.cluster.path.separator", ",");
        }
    });
}

From source file:org.pentaho.hadoop.shim.mapr31.MapR3DistributedCacheUtilImpl.java

License:Apache License

/**
 * Add an file path to the current set of classpath entries. It adds the file
 * to cache as well.//from ww  w.  j  ava2s. c  om
 *
 * This is copied from Hadoop 0.20.2 o.a.h.filecache.DistributedCache so we can inject the correct path separator
 * for the environment the cluster is executing in. See {@link #getClusterPathSeparator()}.
 *
 * @param file Path of the file to be added
 * @param conf Configuration that contains the classpath setting
 */
@Override
public void addFileToClassPath(Path file, Configuration conf) throws IOException {

    String classpath = conf.get("mapred.job.classpath.files");
    conf.set("mapred.job.classpath.files",
            classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString());
    FileSystem fs = FileSystem.get(conf);
    URI uri = fs.makeQualified(file).toUri();

    DistributedCache.addCacheFile(uri, conf);
}

From source file:org.qcri.pca.CompositeJob.java

/**
 * Computes XtX and YtX/*from w w w.  j  av  a 2s  .  c om*/
 * 
 * Xc = (Y - Ym) * MEM = Y * MEM - Ym * MEM = X - Xm
 * 
 * XtX = (X - Xm)' * (X - Xm) YtX = (Y - Ym)' * (Y - Ym)
 * 
 * @param conf
 *          the configuration
 * @param matrixInputPath
 *          Y
 * @param inMemMatrixDir
 *          MEM, where X = Y * MEM
 * @param inMemMatrixNumRows
 *          MEM.rows
 * @param inMemMatrixNumCols
 *          MEM.cols
 * @param ymPath
 *          Ym
 * @param xmPath
 *          Xm
 * @param matrixOutputPath
 *          YtX
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void run(Configuration conf, Path matrixInputPath, String inMemMatrixDir, int inMemMatrixNumRows,
        int inMemMatrixNumCols, String ymPath, String xmPath, Path matrixOutputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(MATRIXINMEMORY, inMemMatrixDir);
    conf.setInt(MATRIXINMEMORYROWS, inMemMatrixNumRows);
    conf.setInt(MATRIXINMEMORYCOLS, inMemMatrixNumCols);
    conf.set(YMPATH, ymPath);
    conf.set(XMPATH, xmPath);
    Path xtxOutputPath = getXtXPathBasedOnYm(new Path(ymPath));
    conf.set(XTXPATH, xtxOutputPath.toString());
    Job job = new Job(conf);
    job.setJobName("CompositeJob-" + matrixInputPath.getName());
    job.setJarByClass(CompositeJob.class);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);
    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(CompositeWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);
    job.setSortComparatorClass(CompositeWritable.class);
    job.setGroupingComparatorClass(CompositeWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    job.waitForCompletion(true);
}