Example usage for org.apache.hadoop.fs FileSystem makeQualified

List of usage examples for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path) 

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:org.notmysock.tez.BroadcastTest.java

License:Apache License

public boolean run(Configuration conf, boolean doLocalityCheck) throws Exception {
    System.out.println("Running BroadcastTest");
    // conf and UGI
    TezConfiguration tezConf;//  w  ww  . j  a v  a2  s  .  com
    if (conf != null) {
        tezConf = new TezConfiguration(conf);
    } else {
        tezConf = new TezConfiguration();
    }
    tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true);
    UserGroupInformation.setConfiguration(tezConf);
    String user = UserGroupInformation.getCurrentUser().getShortUserName();

    // staging dir
    FileSystem fs = FileSystem.get(tezConf);
    String stagingDirStr = Path.SEPARATOR + "user" + Path.SEPARATOR + user + Path.SEPARATOR + ".staging"
            + Path.SEPARATOR + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
    Path stagingDir = new Path(stagingDirStr);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
    stagingDir = fs.makeQualified(stagingDir);

    Path jobJar = new Path(stagingDir, "job.jar");
    fs.copyFromLocalFile(getCurrentJarURL(), jobJar);

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put("job.jar", createLocalResource(fs, jobJar));

    TezClient tezSession = null;
    // needs session or else TaskScheduler does not hold onto containers
    tezSession = TezClient.create("BroadcastTest", tezConf);
    tezSession.addAppMasterLocalFiles(localResources);
    tezSession.start();

    DAGClient dagClient = null;

    try {
        DAG dag = createDAG(fs, tezConf, stagingDir, localResources);

        dag.addTaskLocalFiles(localResources);

        tezSession.waitTillReady();
        dagClient = tezSession.submitDAG(dag);

        // monitoring
        DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(null);
        if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
            System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics());
            return false;
        }
        return true;
    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }
}

From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtil.java

License:Apache License

/**
 * Add an file path to the current set of classpath entries. It adds the file
 * to cache as well./*w ww  .j  a  v  a 2 s.co m*/
 *
 * This is copied from Hadoop 0.20.2 o.a.h.filecache.DistributedCache so we can inject the correct path separator
 * for the environment the cluster is executing in. See {@link #getClusterPathSeparator()}.
 *
 * @param file Path of the file to be added
 * @param conf Configuration that contains the classpath setting
 */
public void addFileToClassPath(Path file, Configuration conf) throws IOException {

    // TODO Replace this with a Hadoop shim if we end up having version-specific implementations scattered around
    if (VersionInfo.getVersion().contains("0.21")) {
        DistributedCache.addFileToClassPath(file, conf);
    } else {
        String classpath = conf.get("mapred.job.classpath.files");
        conf.set("mapred.job.classpath.files",
                classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString());
        FileSystem fs = FileSystem.get(conf);
        URI uri = fs.makeQualified(file).toUri();

        DistributedCache.addCacheFile(uri, conf);
    }
}

From source file:org.pentaho.hadoop.shim.cdh50.HadoopShim.java

License:Apache License

@Override
public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
    fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider());
    setDistributedCacheUtil(new DistributedCacheUtilImpl(config) {

        public void addFileToClassPath(Path file, Configuration conf) throws IOException {
            String classpath = conf.get("mapred.job.classpath.files");
            conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
                    : classpath + getClusterPathSeparator() + file.toString());
            FileSystem fs = FileSystem.get(conf);
            URI uri = fs.makeQualified(file).toUri();

            DistributedCache.addCacheFile(uri, conf);
        }//from w  ww  .  j  ava2  s .c o  m

        public String getClusterPathSeparator() {
            // Use a comma rather than an OS-specific separator (see https://issues.apache.org/jira/browse/HADOOP-4864)
            return System.getProperty("hadoop.cluster.path.separator", ",");
        }
    });
}

From source file:org.pentaho.hadoop.shim.common.DistributedCacheUtilImpl.java

License:Apache License

/**
 * Add an file path to the current set of classpath entries. It adds the file to cache as well.
 * <p/>/*from w  w w . j  a v a 2s  .c  o  m*/
 * This is copied from Hadoop 0.20.2 o.a.h.filecache.DistributedCache so we can inject the correct path separator for
 * the environment the cluster is executing in. See {@link #getClusterPathSeparator()}.
 *
 * @param file Path of the file to be added
 * @param conf Configuration that contains the classpath setting
 */
public void addFileToClassPath(Path file, Configuration conf) throws IOException {

    // TODO Replace this with a Hadoop shim if we end up having version-specific implementations scattered around

    // Save off the classloader, to make sure the version info can be loaded successfully from the hadoop-common JAR
    ClassLoader cl = Thread.currentThread().getContextClassLoader();
    Thread.currentThread().setContextClassLoader(VersionInfo.class.getClassLoader());

    // Get the version string or set to a default value
    String version;
    try {
        version = VersionInfo.getVersion();
    } catch (Throwable t) {
        version = "unknown";
    }

    // Restore the original classloader
    Thread.currentThread().setContextClassLoader(cl);

    String classpath = conf.get("mapred.job.classpath.files");
    conf.set("mapred.job.classpath.files",
            classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString());
    FileSystem fs = FileSystem.get(conf);
    URI uri = fs.makeQualified(file).toUri();

    DistributedCache.addCacheFile(uri, conf);
}

From source file:org.pentaho.hadoop.shim.common.HadoopShimImpl.java

License:Apache License

@Override
public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
    validateHadoopHomeWithWinutils();/*from  w  w w . j a v a2 s  . co  m*/
    fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider());
    setDistributedCacheUtil(new DistributedCacheUtilImpl(config) {

        public void addFileToClassPath(Path file, Configuration conf) throws IOException {
            String classpath = conf.get("mapred.job.classpath.files");
            conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
                    : classpath + getClusterPathSeparator() + file.toString());
            FileSystem fs = FileSystem.get(file.toUri(), conf);
            URI uri = fs.makeQualified(file).toUri();

            DistributedCache.addCacheFile(uri, conf);
        }

        public String getClusterPathSeparator() {
            // Use a comma rather than an OS-specific separator (see https://issues.apache.org/jira/browse/HADOOP-4864)
            return System.getProperty("hadoop.cluster.path.separator", ",");
        }
    });
}

From source file:org.pentaho.hadoop.shim.emr32.HadoopShim.java

License:Apache License

@Override
public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
    fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider());
    setDistributedCacheUtil(new DistributedCacheUtilImpl(config) {

        public void addFileToClassPath(Path file, Configuration conf) throws IOException {
            String classpath = conf.get("mapred.job.classpath.files");
            conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
                    : classpath + getClusterPathSeparator() + file.toString());
            FileSystem fs = FileSystem.get(conf);
            URI uri = fs.makeQualified(file).toUri();
            //Job.getInstance( conf ).addCacheFile( uri );
            DistributedCache.addCacheFile(uri, conf);
        }//from  www.j  a  v  a  2s  .  c o  m

        public String getClusterPathSeparator() {
            // Use a comma rather than an OS-specific separator (see https://issues.apache.org/jira/browse/HADOOP-4864)
            return System.getProperty("hadoop.cluster.path.separator", ",");
        }

    });
    if (!fsm.hasProvider("s3n")) {
        fsm.addProvider(config, "s3n", config.getIdentifier(), new HDFSFileProvider());
    }

}

From source file:org.pentaho.hadoop.shim.hdp20.HadoopShim.java

License:Apache License

@Override
public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
    fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider());
    setDistributedCacheUtil(new DistributedCacheUtilImpl(config) {

        public void addFileToClassPath(Path file, Configuration conf) throws IOException {
            String classpath = conf.get("mapred.job.classpath.files");
            conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
                    : classpath + getClusterPathSeparator() + file.toString());
            FileSystem fs = FileSystem.get(conf);
            URI uri = fs.makeQualified(file).toUri();

            DistributedCache.addCacheFile(uri, conf);
        }// www .j  a  v  a  2s . c  om

        public String getClusterPathSeparator() {
            return System.getProperty("hadoop.cluster.path.separator", ",");
        }
    });
}

From source file:org.pentaho.hadoop.shim.hsp101.HadoopShim.java

License:Apache License

@Override
public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
    fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider());
    setDistributedCacheUtil(new DistributedCacheUtilImpl(config) {
        /**/* w  w  w  .  java2  s  . co  m*/
         * Default permission for cached files
         * <p/>
         * Not using FsPermission.createImmutable due to EOFExceptions when using it with Hadoop 0.20.2
         */
        private final FsPermission CACHED_FILE_PERMISSION = new FsPermission((short) 0755);

        public void addFileToClassPath(Path file, Configuration conf) throws IOException {
            String classpath = conf.get("mapred.job.classpath.files");
            conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
                    : classpath + getClusterPathSeparator() + file.toString());
            FileSystem fs = FileSystem.get(conf);
            URI uri = fs.makeQualified(file).toUri();

            DistributedCache.addCacheFile(uri, conf);
        }

        /**
         * Stages the source file or folder to a Hadoop file system and sets their permission and replication
         * value appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of
         * dest before staging the archive.
         *
         * @param source    File or folder to copy to the file system. If it is a folder all contents will be
         *                  copied into dest.
         * @param fs        Hadoop file system to store the contents of the archive in
         * @param dest      Destination to copy source into. If source is a file, the new file name will be
         *                  exactly dest. If source is a folder its contents will be copied into dest. For more
         *                  info see {@link FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path,
         *                  org.apache.hadoop.fs.Path)}.
         * @param overwrite Should an existing file or folder be overwritten? If not an exception will be
         *                  thrown.
         * @throws IOException         Destination exists is not a directory
         * @throws KettleFileException Source does not exist or destination exists and overwrite is false.
         */
        public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite)
                throws IOException, KettleFileException {
            if (!source.exists()) {
                throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                        "DistributedCacheUtil.SourceDoesNotExist", source));
            }

            if (fs.exists(dest)) {
                if (overwrite) {
                    // It is a directory, clear it out
                    fs.delete(dest, true);
                } else {
                    throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                            "DistributedCacheUtil.DestinationExists", dest.toUri().getPath()));
                }
            }

            // Use the same replication we'd use for submitting jobs
            short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10);

            copyFile(source, fs, dest, overwrite);
            fs.setReplication(dest, replication);
        }

        private void copyFile(FileObject source, FileSystem fs, Path dest, boolean overwrite)
                throws IOException {
            if (source.getType() == FileType.FOLDER) {
                fs.mkdirs(dest);
                fs.setPermission(dest, CACHED_FILE_PERMISSION);
                for (FileObject fileObject : source.getChildren()) {
                    copyFile(fileObject, fs, new Path(dest, fileObject.getName().getBaseName()), overwrite);
                }
            } else {
                try (FSDataOutputStream fsDataOutputStream = fs.create(dest, overwrite)) {
                    IOUtils.copy(source.getContent().getInputStream(), fsDataOutputStream);
                    fs.setPermission(dest, CACHED_FILE_PERMISSION);
                }
            }
        }

        public String getClusterPathSeparator() {
            return System.getProperty("hadoop.cluster.path.separator", ",");
        }
    });
}

From source file:org.pentaho.hadoop.shim.mapr31.MapR3DistributedCacheUtilImpl.java

License:Apache License

/**
 * Add an file path to the current set of classpath entries. It adds the file
 * to cache as well.//from ww  w.  j  ava2s. c  om
 *
 * This is copied from Hadoop 0.20.2 o.a.h.filecache.DistributedCache so we can inject the correct path separator
 * for the environment the cluster is executing in. See {@link #getClusterPathSeparator()}.
 *
 * @param file Path of the file to be added
 * @param conf Configuration that contains the classpath setting
 */
@Override
public void addFileToClassPath(Path file, Configuration conf) throws IOException {

    String classpath = conf.get("mapred.job.classpath.files");
    conf.set("mapred.job.classpath.files",
            classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString());
    FileSystem fs = FileSystem.get(conf);
    URI uri = fs.makeQualified(file).toUri();

    DistributedCache.addCacheFile(uri, conf);
}

From source file:org.qcri.pca.CompositeJob.java

/**
 * Computes XtX and YtX/*from w w w.  j  av  a 2s  .  c om*/
 * 
 * Xc = (Y - Ym) * MEM = Y * MEM - Ym * MEM = X - Xm
 * 
 * XtX = (X - Xm)' * (X - Xm) YtX = (Y - Ym)' * (Y - Ym)
 * 
 * @param conf
 *          the configuration
 * @param matrixInputPath
 *          Y
 * @param inMemMatrixDir
 *          MEM, where X = Y * MEM
 * @param inMemMatrixNumRows
 *          MEM.rows
 * @param inMemMatrixNumCols
 *          MEM.cols
 * @param ymPath
 *          Ym
 * @param xmPath
 *          Xm
 * @param matrixOutputPath
 *          YtX
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void run(Configuration conf, Path matrixInputPath, String inMemMatrixDir, int inMemMatrixNumRows,
        int inMemMatrixNumCols, String ymPath, String xmPath, Path matrixOutputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(MATRIXINMEMORY, inMemMatrixDir);
    conf.setInt(MATRIXINMEMORYROWS, inMemMatrixNumRows);
    conf.setInt(MATRIXINMEMORYCOLS, inMemMatrixNumCols);
    conf.set(YMPATH, ymPath);
    conf.set(XMPATH, xmPath);
    Path xtxOutputPath = getXtXPathBasedOnYm(new Path(ymPath));
    conf.set(XTXPATH, xtxOutputPath.toString());
    Job job = new Job(conf);
    job.setJobName("CompositeJob-" + matrixInputPath.getName());
    job.setJarByClass(CompositeJob.class);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);
    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(CompositeWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);
    job.setSortComparatorClass(CompositeWritable.class);
    job.setGroupingComparatorClass(CompositeWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    job.waitForCompletion(true);
}