Example usage for org.apache.hadoop.fs FileSystem makeQualified

List of usage examples for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path) 

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:org.apache.tez.runtime.TestReflectionUtils.java

License:Apache License

@Test
public void testAddResourceToClasspath() throws IOException, TezException {

    String rsrcName = "dummyfile.xml";
    FileSystem localFs = FileSystem.getLocal(new Configuration());
    Path p = new Path(rsrcName);
    p = localFs.makeQualified(p);

    localFs.delete(p, false);//from   w w w . j  a  v a 2  s. co m

    try {
        URL loadedUrl = null;

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);
        assertNull(loadedUrl);

        // Add parent to classpath since we're not adding a jar
        assertTrue(localFs.createNewFile(p));
        String urlForm = p.toUri().toURL().toString();
        urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1);
        URL url = new URL(urlForm);

        ReflectionUtils.addResourcesToClasspath(Collections.singletonList(url));

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);

        assertNotNull(loadedUrl);
    } finally {
        localFs.delete(p, false);
    }
}

From source file:org.apache.tez.runtime.TestRuntimeUtils.java

License:Apache License

@Test
public void testAddResourceToClasspath() throws IOException, TezException {

    String rsrcName = "dummyfile.xml";
    FileSystem localFs = FileSystem.getLocal(new Configuration());
    Path p = new Path(rsrcName);
    p = localFs.makeQualified(p);

    localFs.delete(p, false);//from   w  w w. ja  v a  2  s .c o m

    try {
        URL loadedUrl = null;

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);
        assertNull(loadedUrl);

        // Add parent to classpath since we're not adding a jar
        assertTrue(localFs.createNewFile(p));
        String urlForm = p.toUri().toURL().toString();
        urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1);
        URL url = new URL(urlForm);

        RuntimeUtils.addResourcesToClasspath(Collections.singletonList(url));

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);

        assertNotNull(loadedUrl);
    } finally {
        localFs.delete(p, false);
    }
}

From source file:org.apache.tez.test.FaultToleranceTestRunner.java

License:Apache License

void setup() throws Exception {
    TezConfiguration tezConf = null;/* w  w w. j  a v  a2 s. c om*/
    if (conf == null) {
        tezConf = new TezConfiguration(new YarnConfiguration());
    } else {
        tezConf = new TezConfiguration(new YarnConfiguration(this.conf));
    }
    FileSystem defaultFs = FileSystem.get(tezConf);

    Path remoteStagingDir = defaultFs
            .makeQualified(new Path(TEST_ROOT_DIR, String.valueOf(new Random().nextInt(100000))));
    TezClientUtils.ensureStagingDirExists(tezConf, remoteStagingDir);

    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

    tezSession = TezClient.create("FaultToleranceTestRunner", tezConf);
    tezSession.start();
}

From source file:org.apache.tez.test.MiniTezCluster.java

License:Apache License

@Override
public void serviceInit(Configuration conf) throws Exception {
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME);
    // Use libs from cluster since no build is available
    conf.setBoolean(TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS, true);
    // blacklisting disabled to prevent scheduling issues
    conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
    if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) {
        conf.set(MRJobConfig.MR_AM_STAGING_DIR,
                new File(getTestWorkDir(), "apps_staging_dir" + Path.SEPARATOR).getAbsolutePath());
    }/*from   w  ww .j  av a 2 s  . c  om*/

    if (conf.get(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC) == null) {
        // nothing defined. set quick delete value
        conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l);
    }

    File appJarLocalFile = new File(MiniTezCluster.APPJAR);

    if (!appJarLocalFile.exists()) {
        String message = "TezAppJar " + MiniTezCluster.APPJAR + " not found. Exiting.";
        LOG.info(message);
        throw new TezUncheckedException(message);
    } else {
        LOG.info("Using Tez AppJar: " + appJarLocalFile.getAbsolutePath());
    }

    FileSystem fs = FileSystem.get(conf);
    Path testRootDir = fs.makeQualified(new Path("target", getName() + "-tmpDir"));
    Path appRemoteJar = new Path(testRootDir, "TezAppJar.jar");
    // Copy AppJar and make it public.
    Path appMasterJar = new Path(MiniTezCluster.APPJAR);
    fs.copyFromLocalFile(appMasterJar, appRemoteJar);
    fs.setPermission(appRemoteJar, new FsPermission("777"));

    conf.set(TezConfiguration.TEZ_LIB_URIS, appRemoteJar.toUri().toString());
    LOG.info("Set TEZ-LIB-URI to: " + conf.get(TezConfiguration.TEZ_LIB_URIS));

    // VMEM monitoring disabled, PMEM monitoring enabled.
    conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
    conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);

    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000");

    try {
        Path stagingPath = FileContext.getFileContext(conf)
                .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR)));
        /*
         * Re-configure the staging path on Windows if the file system is localFs.
         * We need to use a absolute path that contains the drive letter. The unit
         * test could run on a different drive than the AM. We can run into the
         * issue that job files are localized to the drive where the test runs on,
         * while the AM starts on a different drive and fails to find the job
         * metafiles. Using absolute path can avoid this ambiguity.
         */
        if (Path.WINDOWS) {
            if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) {
                conf.set(MRJobConfig.MR_AM_STAGING_DIR,
                        new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath());
            }
        }
        FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf);
        if (fc.util().exists(stagingPath)) {
            LOG.info(stagingPath + " exists! deleting...");
            fc.delete(stagingPath, true);
        }
        LOG.info("mkdir: " + stagingPath);
        fc.mkdir(stagingPath, null, true);

        //mkdir done directory as well
        String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf);
        Path doneDirPath = fc.makeQualified(new Path(doneDir));
        fc.mkdir(doneDirPath, null, true);
    } catch (IOException e) {
        throw new TezUncheckedException("Could not create staging directory. ", e);
    }
    conf.set(MRConfig.MASTER_ADDRESS, "test");

    //configure the shuffle service in NM
    conf.setStrings(YarnConfiguration.NM_AUX_SERVICES,
            new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID });
    conf.setClass(
            String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID),
            ShuffleHandler.class, Service.class);

    // Non-standard shuffle port
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);

    conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, DefaultContainerExecutor.class,
            ContainerExecutor.class);

    // TestMRJobs is for testing non-uberized operation only; see TestUberAM
    // for corresponding uberized tests.
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    super.serviceInit(conf);
}

From source file:org.apache.tez.test.TestFaultTolerance.java

License:Apache License

@BeforeClass
public static void setup() throws Exception {
    LOG.info("Starting mini clusters");
    FileSystem remoteFs = null;
    try {//from www.  java 2s .  c o  m
        conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR);
        dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).format(true).racks(null).build();
        remoteFs = dfsCluster.getFileSystem();
    } catch (IOException io) {
        throw new RuntimeException("problem starting mini dfs cluster", io);
    }
    if (miniTezCluster == null) {
        miniTezCluster = new MiniTezCluster(TestFaultTolerance.class.getName(), 4, 1, 1);
        Configuration miniTezconf = new Configuration(conf);
        miniTezconf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS
        miniTezCluster.init(miniTezconf);
        miniTezCluster.start();

        Path remoteStagingDir = remoteFs
                .makeQualified(new Path(TEST_ROOT_DIR, String.valueOf(new Random().nextInt(100000))));
        TezClientUtils.ensureStagingDirExists(conf, remoteStagingDir);

        TezConfiguration tezConf = new TezConfiguration(miniTezCluster.getConfig());
        tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());
        tezConf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);

        tezSession = TezClient.create("TestFaultTolerance", tezConf, true);
        tezSession.start();
    }
}

From source file:org.apache.tez.tests.MiniTezClusterWithTimeline.java

License:Apache License

@Override
public void serviceInit(Configuration conf) throws Exception {
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME);
    // Use libs from cluster since no build is available
    conf.setBoolean(TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS, true);
    // blacklisting disabled to prevent scheduling issues
    conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
    if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) {
        conf.set(MRJobConfig.MR_AM_STAGING_DIR,
                new File(getTestWorkDir(), "apps_staging_dir" + Path.SEPARATOR).getAbsolutePath());
    }/*from   w  w  w . j  a  va2s .c  o m*/

    if (conf.get(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC) == null) {
        // nothing defined. set quick delete value
        conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l);
    }

    File appJarLocalFile = new File(MiniTezClusterWithTimeline.APPJAR);

    if (!appJarLocalFile.exists()) {
        String message = "TezAppJar " + MiniTezClusterWithTimeline.APPJAR + " not found. Exiting.";
        LOG.info(message);
        throw new TezUncheckedException(message);
    } else {
        LOG.info("Using Tez AppJar: " + appJarLocalFile.getAbsolutePath());
    }

    FileSystem fs = FileSystem.get(conf);
    Path testRootDir = fs.makeQualified(new Path("target", getName() + "-tmpDir"));
    Path appRemoteJar = new Path(testRootDir, "TezAppJar.jar");
    // Copy AppJar and make it public.
    Path appMasterJar = new Path(MiniTezClusterWithTimeline.APPJAR);
    fs.copyFromLocalFile(appMasterJar, appRemoteJar);
    fs.setPermission(appRemoteJar, new FsPermission("777"));

    conf.set(TezConfiguration.TEZ_LIB_URIS, appRemoteJar.toUri().toString());
    LOG.info("Set TEZ-LIB-URI to: " + conf.get(TezConfiguration.TEZ_LIB_URIS));

    // VMEM monitoring disabled, PMEM monitoring enabled.
    conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
    conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);

    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000");

    try {
        Path stagingPath = FileContext.getFileContext(conf)
                .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR)));
        /*
         * Re-configure the staging path on Windows if the file system is localFs.
         * We need to use a absolute path that contains the drive letter. The unit
         * test could run on a different drive than the AM. We can run into the
         * issue that job files are localized to the drive where the test runs on,
         * while the AM starts on a different drive and fails to find the job
         * metafiles. Using absolute path can avoid this ambiguity.
         */
        if (Path.WINDOWS) {
            if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) {
                conf.set(MRJobConfig.MR_AM_STAGING_DIR,
                        new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath());
            }
        }
        FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf);
        if (fc.util().exists(stagingPath)) {
            LOG.info(stagingPath + " exists! deleting...");
            fc.delete(stagingPath, true);
        }
        LOG.info("mkdir: " + stagingPath);
        fc.mkdir(stagingPath, null, true);

        //mkdir done directory as well
        String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf);
        Path doneDirPath = fc.makeQualified(new Path(doneDir));
        fc.mkdir(doneDirPath, null, true);
    } catch (IOException e) {
        throw new TezUncheckedException("Could not create staging directory. ", e);
    }
    conf.set(MRConfig.MASTER_ADDRESS, "test");

    //configure the shuffle service in NM
    conf.setStrings(YarnConfiguration.NM_AUX_SERVICES,
            new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID });
    conf.setClass(
            String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID),
            ShuffleHandler.class, Service.class);

    // Non-standard shuffle port
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);

    conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, DefaultContainerExecutor.class,
            ContainerExecutor.class);

    // TestMRJobs is for testing non-uberized operation only; see TestUberAM
    // for corresponding uberized tests.
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    super.serviceInit(conf);
}

From source file:org.cloudata.core.parallel.hadoop.CloudataMapReduceUtil.java

License:Apache License

/**
 * Configure classpath to run MapReduce job which uses Cloudata<BR>
 * 1. Uploads cloudata library to HDFS: cloudata-xxx-core.jar<BR>
 * 2. make cloudata-configure.jar includes cloudata-site.xml and uploads to HDFS<BR>
 * 3. run DistributedCache.addArchiveToClassPath() for each file<BR>
 * Must call clearMapReduce() after running job.
 * @param jobConf//from w  w  w . ja v  a 2s  .c o m
 * @param cloudataHomeDir
 * @return temporary directory for linrary in HDFS. When calling clearMapReduce(), use this value.
 * @throws IOException
 */
public static String initMapReduce(JobConf jobConf) throws IOException {

    String jarPath = "Cloudata_Lib_" + System.currentTimeMillis();

    FileSystem fs = FileSystem.get(conf);

    Path parentPath = fs.makeQualified(new Path(jarPath));
    //upload jar to dfs
    fs.mkdirs(parentPath);

    String[] classpaths = System.getProperty("java.class.path", "").split(File.pathSeparator);

    if (classpaths == null || classpaths.length == 0) {
        throw new IOException("No classpath");
    }

    List<String> uploadedFiles = new ArrayList<String>();
    for (String eachPath : classpaths) {
        if (eachPath.indexOf("cloudata") >= 0 || eachPath.indexOf("lib") >= 0
                || eachPath.indexOf("conf") >= 0) {
            uploadFile(fs, parentPath, new File(eachPath), uploadedFiles);
        }
    }

    if (uploadedFiles.size() == 0) {
        throw new IOException("No lib files[cloudata-xxx-core.jar, zookeeper-xxx.jar] in classpath");
    }

    Path rootPath = new Path("/");
    String rootUri = fs.makeQualified(rootPath).toString();

    for (String eachPath : uploadedFiles) {
        Path path = fs.makeQualified(new Path(eachPath));
        String pathStr = path.toUri().toString();
        if (pathStr.indexOf(rootUri) >= 0) {
            pathStr = pathStr.substring(pathStr.indexOf(rootUri) + rootUri.length());
        }
        if (!pathStr.startsWith("/")) {
            pathStr = "/" + pathStr;
        }
        LOG.debug("DistributedCache.addArchiveToClassPath: " + pathStr);
        DistributedCache.addArchiveToClassPath(new Path(pathStr), jobConf);
    }
    return jarPath;
}

From source file:org.deeplearning4j.iterativereduce.runtime.Utils.java

License:Apache License

private static void copyToFs(Configuration conf, String local, String remote) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path src = new Path(local);
    Path dst = fs.makeQualified(new Path(remote));

    LOG.debug("Copying to filesystem, src=" + src.toString() + ", dst=" + dst);

    fs.copyFromLocalFile(false, true, src, dst);
}

From source file:org.kitesdk.apps.spark.spi.streaming.SparkStreamingJobManager.java

License:Apache License

@Override
public void start(FileSystem fs, Path appRoot) {
    JobConf jobConf = new JobConf();
    jobConf.setJarByClass(SparkStreamingJobMain.class);
    String containingJar = jobConf.getJar();

    Path libPath = new Path(appRoot, "lib");

    Path jarPath = new Path(libPath, new File(containingJar).getName());
    jarPath = fs.makeQualified(jarPath);

    SparkLauncher launcher = new SparkLauncher();

    launcher.setMainClass(SparkStreamingJobMain.class.getName());

    launcher.setAppResource(jarPath.toString());

    launcher.setMaster("yarn-cluster");

    try {/*from   w w  w  . ja  v a 2 s .  c o  m*/
        // Add the library JARs from HDFS so we don't need to reload
        // them separately into Spark.
        FileStatus[] libJars = fs.listStatus(libPath);

        for (FileStatus jar : libJars) {

            launcher.addJar(jar.getPath().toString());
        }

        // Add the sharelib JARs, since they are not visible to Spark otherwise.
        List<Path> shareLibJars = ShareLibs.jars(sparkJobContext.getHadoopConf(), "hive2");

        for (Path sharelibJar : shareLibJars) {

            launcher.addJar(fs.makeQualified(sharelibJar).toString());
        }

    } catch (IOException e) {
        throw new AppException(e);
    }

    launcher.addAppArgs(appRoot.toString(), description.getJobName());

    // Explicitly set the metastore URI to be usable in the job.
    launcher.setConf("spark.hadoop.hive.metastore.uris",
            sparkJobContext.getHadoopConf().get("hive.metastore.uris"));

    // Add the Avro classes.
    List<Schema> schemas = JobReflection.getSchemas(job);
    StringBuilder avroClassesArg = new StringBuilder();

    avroClassesArg.append("-D").append(KryoAvroRegistrator.KITE_AVRO_CLASSES).append("=");

    boolean first = true;

    for (Schema schema : schemas) {

        if (!first) {
            avroClassesArg.append(",");
        }

        avroClassesArg.append(SpecificData.get().getClass(schema).getName());

        first = false;
    }

    launcher.setConf("spark.driver.extraJavaOptions", avroClassesArg.toString());
    launcher.setConf("spark.executor.extraJavaOptions", avroClassesArg.toString());

    try {

        Process process = launcher.launch();

        // Redirect the spark-submit output to be visible to the reader.
        Thread stdoutThread = writeOutput(process.getInputStream(), System.out);
        Thread stderrThread = writeOutput(process.getErrorStream(), System.err);

        int result = process.waitFor();

        stdoutThread.join();
        stderrThread.join();

        if (result != 0) {
            throw new AppException("spark-submit returned error status: " + result);
        }

    } catch (IOException e) {
        throw new AppException(e);
    } catch (InterruptedException e) {
        throw new AppException(e);
    }
}

From source file:org.kitesdk.cli.commands.CSVImportCommand.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() == 2,
            "CSV path and target dataset name are required.");

    Path source = qualifiedPath(targets.get(0));
    FileSystem sourceFS = source.getFileSystem(getConf());
    Preconditions.checkArgument(sourceFS.exists(source), "CSV path does not exist: " + source);

    if (header != null) {
        // if a header is given on the command line, do assume one is in the file
        noHeader = true;//from w w w .  ja va  2  s  . com
    }

    CSVProperties props = new CSVProperties.Builder().delimiter(delimiter).escape(escape).quote(quote)
            .header(header).hasHeader(!noHeader).linesToSkip(linesToSkip).charset(charsetName).build();

    String dataset = targets.get(1);

    View<Record> target = load(dataset, Record.class);
    Schema datasetSchema = target.getDataset().getDescriptor().getSchema();

    // TODO: replace this with a temporary Dataset from a FS repo
    // TODO: CDK-92: always use GenericRecord?

    DatasetDescriptor csvDescriptor = new DatasetDescriptor.Builder().location(source.toUri())
            .schema(ColumnMappingParser
                    .removeEmbeddedMapping(PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema)))
            .format("csv").build();
    csvDescriptor = props.addToDescriptor(csvDescriptor);

    TemporaryFileSystemDatasetRepository repo = new TemporaryFileSystemDatasetRepository(getConf(),
            // ensure the same FS as the file source is used
            sourceFS.makeQualified(new Path("/tmp/" + UUID.randomUUID().toString())),
            target.getDataset().getNamespace(), UUID.randomUUID().toString());

    try {
        FileSystemDataset<Record> csvDataset = (FileSystemDataset) repo.create("default", "csv", csvDescriptor);

        Iterator<Path> iter = csvDataset.pathIterator().iterator();
        Preconditions.checkArgument(iter.hasNext(), "CSV path has no data files: " + source);

        TaskUtil.configure(getConf()).addJars(jars);

        TransformTask task;
        if (transform != null) {
            DoFn<Record, Record> transformFn;
            try {
                DynConstructors.Ctor<DoFn<Record, Record>> ctor = new DynConstructors.Builder(DoFn.class)
                        .loader(loaderForJars(jars)).impl(transform).buildChecked();
                transformFn = ctor.newInstance();
            } catch (NoSuchMethodException e) {
                throw new DatasetException("Cannot find no-arg constructor for class: " + transform, e);
            }
            task = new TransformTask<Record, Record>(csvDataset, target, transformFn);
        } else {
            task = new CopyTask<Record>(csvDataset, target);
        }

        task.setConf(getConf());

        if (noCompaction) {
            task.noCompaction();
        }

        if (numWriters >= 0) {
            task.setNumWriters(numWriters);
        }

        PipelineResult result = task.run();

        if (result.succeeded()) {
            long count = task.getCount();
            if (count > 0) {
                console.info("Added {} records to \"{}\"", count, dataset);
            }
            return 0;
        } else {
            return 1;
        }
    } finally {
        // clean up the temporary repository
        repo.delete();
    }
}