Example usage for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path)

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:org.apache.tez.runtime.TestReflectionUtils.java

License:Apache License

@Test
public void testAddResourceToClasspath() throws IOException, TezException {

    String rsrcName = "dummyfile.xml";
    FileSystem localFs = FileSystem.getLocal(new Configuration());
    Path p = new Path(rsrcName);
    p = localFs.makeQualified(p);

    localFs.delete(p, false);//from   w w w . j  a  v a 2  s. co m

    try {
        URL loadedUrl = null;

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);
        assertNull(loadedUrl);

        // Add parent to classpath since we're not adding a jar
        assertTrue(localFs.createNewFile(p));
        String urlForm = p.toUri().toURL().toString();
        urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1);
        URL url = new URL(urlForm);

        ReflectionUtils.addResourcesToClasspath(Collections.singletonList(url));

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);

        assertNotNull(loadedUrl);
    } finally {
        localFs.delete(p, false);
    }
}

From source file:org.apache.tez.runtime.TestRuntimeUtils.java

License:Apache License

@Test
public void testAddResourceToClasspath() throws IOException, TezException {

    String rsrcName = "dummyfile.xml";
    FileSystem localFs = FileSystem.getLocal(new Configuration());
    Path p = new Path(rsrcName);
    p = localFs.makeQualified(p);

    localFs.delete(p, false);//from   w  w w. ja  v a  2  s .c o m

    try {
        URL loadedUrl = null;

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);
        assertNull(loadedUrl);

        // Add parent to classpath since we're not adding a jar
        assertTrue(localFs.createNewFile(p));
        String urlForm = p.toUri().toURL().toString();
        urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1);
        URL url = new URL(urlForm);

        RuntimeUtils.addResourcesToClasspath(Collections.singletonList(url));

        loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName);

        assertNotNull(loadedUrl);
    } finally {
        localFs.delete(p, false);
    }
}

From source file:org.apache.tez.test.FaultToleranceTestRunner.java

License:Apache License

void setup() throws Exception {
    TezConfiguration tezConf = null;/* w  w w. j  a v  a2 s. c om*/
    if (conf == null) {
        tezConf = new TezConfiguration(new YarnConfiguration());
    } else {
        tezConf = new TezConfiguration(new YarnConfiguration(this.conf));
    }
    FileSystem defaultFs = FileSystem.get(tezConf);

    Path remoteStagingDir = defaultFs
            .makeQualified(new Path(TEST_ROOT_DIR, String.valueOf(new Random().nextInt(100000))));
    TezClientUtils.ensureStagingDirExists(tezConf, remoteStagingDir);

    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

    tezSession = TezClient.create("FaultToleranceTestRunner", tezConf);
    tezSession.start();
}

From source file:org.apache.tez.test.MiniTezCluster.java

License:Apache License

@Override
public void serviceInit(Configuration conf) throws Exception {
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME);
    // Use libs from cluster since no build is available
    conf.setBoolean(TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS, true);
    // blacklisting disabled to prevent scheduling issues
    conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
    if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) {
        conf.set(MRJobConfig.MR_AM_STAGING_DIR,
                new File(getTestWorkDir(), "apps_staging_dir" + Path.SEPARATOR).getAbsolutePath());
    }/*from   w  ww .j  av a 2 s  . c  om*/

    if (conf.get(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC) == null) {
        // nothing defined. set quick delete value
        conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l);
    }

    File appJarLocalFile = new File(MiniTezCluster.APPJAR);

    if (!appJarLocalFile.exists()) {
        String message = "TezAppJar " + MiniTezCluster.APPJAR + " not found. Exiting.";
        LOG.info(message);
        throw new TezUncheckedException(message);
    } else {
        LOG.info("Using Tez AppJar: " + appJarLocalFile.getAbsolutePath());
    }

    FileSystem fs = FileSystem.get(conf);
    Path testRootDir = fs.makeQualified(new Path("target", getName() + "-tmpDir"));
    Path appRemoteJar = new Path(testRootDir, "TezAppJar.jar");
    // Copy AppJar and make it public.
    Path appMasterJar = new Path(MiniTezCluster.APPJAR);
    fs.copyFromLocalFile(appMasterJar, appRemoteJar);
    fs.setPermission(appRemoteJar, new FsPermission("777"));

    conf.set(TezConfiguration.TEZ_LIB_URIS, appRemoteJar.toUri().toString());
    LOG.info("Set TEZ-LIB-URI to: " + conf.get(TezConfiguration.TEZ_LIB_URIS));

    // VMEM monitoring disabled, PMEM monitoring enabled.
    conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
    conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);

    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000");

    try {
        Path stagingPath = FileContext.getFileContext(conf)
                .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR)));
        /*
         * Re-configure the staging path on Windows if the file system is localFs.
         * We need to use a absolute path that contains the drive letter. The unit
         * test could run on a different drive than the AM. We can run into the
         * issue that job files are localized to the drive where the test runs on,
         * while the AM starts on a different drive and fails to find the job
         * metafiles. Using absolute path can avoid this ambiguity.
         */
        if (Path.WINDOWS) {
            if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) {
                conf.set(MRJobConfig.MR_AM_STAGING_DIR,
                        new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath());
            }
        }
        FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf);
        if (fc.util().exists(stagingPath)) {
            LOG.info(stagingPath + " exists! deleting...");
            fc.delete(stagingPath, true);
        }
        LOG.info("mkdir: " + stagingPath);
        fc.mkdir(stagingPath, null, true);

        //mkdir done directory as well
        String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf);
        Path doneDirPath = fc.makeQualified(new Path(doneDir));
        fc.mkdir(doneDirPath, null, true);
    } catch (IOException e) {
        throw new TezUncheckedException("Could not create staging directory. ", e);
    }
    conf.set(MRConfig.MASTER_ADDRESS, "test");

    //configure the shuffle service in NM
    conf.setStrings(YarnConfiguration.NM_AUX_SERVICES,
            new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID });
    conf.setClass(
            String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID),
            ShuffleHandler.class, Service.class);

    // Non-standard shuffle port
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);

    conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, DefaultContainerExecutor.class,
            ContainerExecutor.class);

    // TestMRJobs is for testing non-uberized operation only; see TestUberAM
    // for corresponding uberized tests.
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    super.serviceInit(conf);
}

From source file:org.apache.tez.test.TestFaultTolerance.java

License:Apache License

@BeforeClass
public static void setup() throws Exception {
    LOG.info("Starting mini clusters");
    FileSystem remoteFs = null;
    try {//from www.  java 2s .  c o  m
        conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR);
        dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).format(true).racks(null).build();
        remoteFs = dfsCluster.getFileSystem();
    } catch (IOException io) {
        throw new RuntimeException("problem starting mini dfs cluster", io);
    }
    if (miniTezCluster == null) {
        miniTezCluster = new MiniTezCluster(TestFaultTolerance.class.getName(), 4, 1, 1);
        Configuration miniTezconf = new Configuration(conf);
        miniTezconf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS
        miniTezCluster.init(miniTezconf);
        miniTezCluster.start();

        Path remoteStagingDir = remoteFs
                .makeQualified(new Path(TEST_ROOT_DIR, String.valueOf(new Random().nextInt(100000))));
        TezClientUtils.ensureStagingDirExists(conf, remoteStagingDir);

        TezConfiguration tezConf = new TezConfiguration(miniTezCluster.getConfig());
        tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());
        tezConf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);

        tezSession = TezClient.create("TestFaultTolerance", tezConf, true);
        tezSession.start();
    }
}

From source file:org.apache.tez.tests.MiniTezClusterWithTimeline.java

License:Apache License

@Override
public void serviceInit(Configuration conf) throws Exception {
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME);
    // Use libs from cluster since no build is available
    conf.setBoolean(TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS, true);
    // blacklisting disabled to prevent scheduling issues
    conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
    if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) {
        conf.set(MRJobConfig.MR_AM_STAGING_DIR,
                new File(getTestWorkDir(), "apps_staging_dir" + Path.SEPARATOR).getAbsolutePath());
    }/*from   w  w  w . j  a  va2s .c  o m*/

    if (conf.get(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC) == null) {
        // nothing defined. set quick delete value
        conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l);
    }

    File appJarLocalFile = new File(MiniTezClusterWithTimeline.APPJAR);

    if (!appJarLocalFile.exists()) {
        String message = "TezAppJar " + MiniTezClusterWithTimeline.APPJAR + " not found. Exiting.";
        LOG.info(message);
        throw new TezUncheckedException(message);
    } else {
        LOG.info("Using Tez AppJar: " + appJarLocalFile.getAbsolutePath());
    }

    FileSystem fs = FileSystem.get(conf);
    Path testRootDir = fs.makeQualified(new Path("target", getName() + "-tmpDir"));
    Path appRemoteJar = new Path(testRootDir, "TezAppJar.jar");
    // Copy AppJar and make it public.
    Path appMasterJar = new Path(MiniTezClusterWithTimeline.APPJAR);
    fs.copyFromLocalFile(appMasterJar, appRemoteJar);
    fs.setPermission(appRemoteJar, new FsPermission("777"));

    conf.set(TezConfiguration.TEZ_LIB_URIS, appRemoteJar.toUri().toString());
    LOG.info("Set TEZ-LIB-URI to: " + conf.get(TezConfiguration.TEZ_LIB_URIS));

    // VMEM monitoring disabled, PMEM monitoring enabled.
    conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
    conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);

    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000");

    try {
        Path stagingPath = FileContext.getFileContext(conf)
                .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR)));
        /*
         * Re-configure the staging path on Windows if the file system is localFs.
         * We need to use a absolute path that contains the drive letter. The unit
         * test could run on a different drive than the AM. We can run into the
         * issue that job files are localized to the drive where the test runs on,
         * while the AM starts on a different drive and fails to find the job
         * metafiles. Using absolute path can avoid this ambiguity.
         */
        if (Path.WINDOWS) {
            if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) {
                conf.set(MRJobConfig.MR_AM_STAGING_DIR,
                        new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath());
            }
        }
        FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf);
        if (fc.util().exists(stagingPath)) {
            LOG.info(stagingPath + " exists! deleting...");
            fc.delete(stagingPath, true);
        }
        LOG.info("mkdir: " + stagingPath);
        fc.mkdir(stagingPath, null, true);

        //mkdir done directory as well
        String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf);
        Path doneDirPath = fc.makeQualified(new Path(doneDir));
        fc.mkdir(doneDirPath, null, true);
    } catch (IOException e) {
        throw new TezUncheckedException("Could not create staging directory. ", e);
    }
    conf.set(MRConfig.MASTER_ADDRESS, "test");

    //configure the shuffle service in NM
    conf.setStrings(YarnConfiguration.NM_AUX_SERVICES,
            new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID });
    conf.setClass(
            String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID),
            ShuffleHandler.class, Service.class);

    // Non-standard shuffle port
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);

    conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, DefaultContainerExecutor.class,
            ContainerExecutor.class);

    // TestMRJobs is for testing non-uberized operation only; see TestUberAM
    // for corresponding uberized tests.
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    super.serviceInit(conf);
}

From source file:org.cloudata.core.parallel.hadoop.CloudataMapReduceUtil.java

License:Apache License

/**
 * Configure classpath to run MapReduce job which uses Cloudata<BR>
 * 1. Uploads cloudata library to HDFS: cloudata-xxx-core.jar<BR>
 * 2. make cloudata-configure.jar includes cloudata-site.xml and uploads to HDFS<BR>
 * 3. run DistributedCache.addArchiveToClassPath() for each file<BR>
 * Must call clearMapReduce() after running job.
 * @param jobConf//from w  w  w . ja v  a 2s  .c o m
 * @param cloudataHomeDir
 * @return temporary directory for linrary in HDFS. When calling clearMapReduce(), use this value.
 * @throws IOException
 */
public static String initMapReduce(JobConf jobConf) throws IOException {

    String jarPath = "Cloudata_Lib_" + System.currentTimeMillis();

    FileSystem fs = FileSystem.get(conf);

    Path parentPath = fs.makeQualified(new Path(jarPath));
    //upload jar to dfs
    fs.mkdirs(parentPath);

    String[] classpaths = System.getProperty("java.class.path", "").split(File.pathSeparator);

    if (classpaths == null || classpaths.length == 0) {
        throw new IOException("No classpath");
    }

    List<String> uploadedFiles = new ArrayList<String>();
    for (String eachPath : classpaths) {
        if (eachPath.indexOf("cloudata") >= 0 || eachPath.indexOf("lib") >= 0
                || eachPath.indexOf("conf") >= 0) {
            uploadFile(fs, parentPath, new File(eachPath), uploadedFiles);
        }
    }

    if (uploadedFiles.size() == 0) {
        throw new IOException("No lib files[cloudata-xxx-core.jar, zookeeper-xxx.jar] in classpath");
    }

    Path rootPath = new Path("/");
    String rootUri = fs.makeQualified(rootPath).toString();

    for (String eachPath : uploadedFiles) {
        Path path = fs.makeQualified(new Path(eachPath));
        String pathStr = path.toUri().toString();
        if (pathStr.indexOf(rootUri) >= 0) {
            pathStr = pathStr.substring(pathStr.indexOf(rootUri) + rootUri.length());
        }
        if (!pathStr.startsWith("/")) {
            pathStr = "/" + pathStr;
        }
        LOG.debug("DistributedCache.addArchiveToClassPath: " + pathStr);
        DistributedCache.addArchiveToClassPath(new Path(pathStr), jobConf);
    }
    return jarPath;
}

From source file:org.deeplearning4j.iterativereduce.runtime.Utils.java

License:Apache License

private static void copyToFs(Configuration conf, String local, String remote) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path src = new Path(local);
    Path dst = fs.makeQualified(new Path(remote));

    LOG.debug("Copying to filesystem, src=" + src.toString() + ", dst=" + dst);

    fs.copyFromLocalFile(false, true, src, dst);
}

From source file:org.kitesdk.apps.spark.spi.streaming.SparkStreamingJobManager.java

License:Apache License

@Override
public void start(FileSystem fs, Path appRoot) {
    JobConf jobConf = new JobConf();
    jobConf.setJarByClass(SparkStreamingJobMain.class);
    String containingJar = jobConf.getJar();

    Path libPath = new Path(appRoot, "lib");

    Path jarPath = new Path(libPath, new File(containingJar).getName());
    jarPath = fs.makeQualified(jarPath);

    SparkLauncher launcher = new SparkLauncher();

    launcher.setMainClass(SparkStreamingJobMain.class.getName());

    launcher.setAppResource(jarPath.toString());

    launcher.setMaster("yarn-cluster");

    try {/*from   w w  w  . ja  v a 2 s .  c o  m*/
        // Add the library JARs from HDFS so we don't need to reload
        // them separately into Spark.
        FileStatus[] libJars = fs.listStatus(libPath);

        for (FileStatus jar : libJars) {

            launcher.addJar(jar.getPath().toString());
        }

        // Add the sharelib JARs, since they are not visible to Spark otherwise.
        List<Path> shareLibJars = ShareLibs.jars(sparkJobContext.getHadoopConf(), "hive2");

        for (Path sharelibJar : shareLibJars) {

            launcher.addJar(fs.makeQualified(sharelibJar).toString());
        }

    } catch (IOException e) {
        throw new AppException(e);
    }

    launcher.addAppArgs(appRoot.toString(), description.getJobName());

    // Explicitly set the metastore URI to be usable in the job.
    launcher.setConf("spark.hadoop.hive.metastore.uris",
            sparkJobContext.getHadoopConf().get("hive.metastore.uris"));

    // Add the Avro classes.
    List<Schema> schemas = JobReflection.getSchemas(job);
    StringBuilder avroClassesArg = new StringBuilder();

    avroClassesArg.append("-D").append(KryoAvroRegistrator.KITE_AVRO_CLASSES).append("=");

    boolean first = true;

    for (Schema schema : schemas) {

        if (!first) {
            avroClassesArg.append(",");
        }

        avroClassesArg.append(SpecificData.get().getClass(schema).getName());

        first = false;
    }

    launcher.setConf("spark.driver.extraJavaOptions", avroClassesArg.toString());
    launcher.setConf("spark.executor.extraJavaOptions", avroClassesArg.toString());

    try {

        Process process = launcher.launch();

        // Redirect the spark-submit output to be visible to the reader.
        Thread stdoutThread = writeOutput(process.getInputStream(), System.out);
        Thread stderrThread = writeOutput(process.getErrorStream(), System.err);

        int result = process.waitFor();

        stdoutThread.join();
        stderrThread.join();

        if (result != 0) {
            throw new AppException("spark-submit returned error status: " + result);
        }

    } catch (IOException e) {
        throw new AppException(e);
    } catch (InterruptedException e) {
        throw new AppException(e);
    }
}

From source file:org.kitesdk.cli.commands.CSVImportCommand.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() == 2,
            "CSV path and target dataset name are required.");

    Path source = qualifiedPath(targets.get(0));
    FileSystem sourceFS = source.getFileSystem(getConf());
    Preconditions.checkArgument(sourceFS.exists(source), "CSV path does not exist: " + source);

    if (header != null) {
        // if a header is given on the command line, do assume one is in the file
        noHeader = true;//from w w w .  ja va  2  s  . com
    }

    CSVProperties props = new CSVProperties.Builder().delimiter(delimiter).escape(escape).quote(quote)
            .header(header).hasHeader(!noHeader).linesToSkip(linesToSkip).charset(charsetName).build();

    String dataset = targets.get(1);

    View<Record> target = load(dataset, Record.class);
    Schema datasetSchema = target.getDataset().getDescriptor().getSchema();

    // TODO: replace this with a temporary Dataset from a FS repo
    // TODO: CDK-92: always use GenericRecord?

    DatasetDescriptor csvDescriptor = new DatasetDescriptor.Builder().location(source.toUri())
            .schema(ColumnMappingParser
                    .removeEmbeddedMapping(PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema)))
            .format("csv").build();
    csvDescriptor = props.addToDescriptor(csvDescriptor);

    TemporaryFileSystemDatasetRepository repo = new TemporaryFileSystemDatasetRepository(getConf(),
            // ensure the same FS as the file source is used
            sourceFS.makeQualified(new Path("/tmp/" + UUID.randomUUID().toString())),
            target.getDataset().getNamespace(), UUID.randomUUID().toString());

    try {
        FileSystemDataset<Record> csvDataset = (FileSystemDataset) repo.create("default", "csv", csvDescriptor);

        Iterator<Path> iter = csvDataset.pathIterator().iterator();
        Preconditions.checkArgument(iter.hasNext(), "CSV path has no data files: " + source);

        TaskUtil.configure(getConf()).addJars(jars);

        TransformTask task;
        if (transform != null) {
            DoFn<Record, Record> transformFn;
            try {
                DynConstructors.Ctor<DoFn<Record, Record>> ctor = new DynConstructors.Builder(DoFn.class)
                        .loader(loaderForJars(jars)).impl(transform).buildChecked();
                transformFn = ctor.newInstance();
            } catch (NoSuchMethodException e) {
                throw new DatasetException("Cannot find no-arg constructor for class: " + transform, e);
            }
            task = new TransformTask<Record, Record>(csvDataset, target, transformFn);
        } else {
            task = new CopyTask<Record>(csvDataset, target);
        }

        task.setConf(getConf());

        if (noCompaction) {
            task.noCompaction();
        }

        if (numWriters >= 0) {
            task.setNumWriters(numWriters);
        }

        PipelineResult result = task.run();

        if (result.succeeded()) {
            long count = task.getCount();
            if (count > 0) {
                console.info("Added {} records to \"{}\"", count, dataset);
            }
            return 0;
        } else {
            return 1;
        }
    } finally {
        // clean up the temporary repository
        repo.delete();
    }
}