Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(Path src, Path dst) throws IOException

Source Link

Document

The src file is on the local disk.

Usage

From source file:org.apache.tez.mapreduce.examples.FilterLinesByWordOneToOne.java

License:Apache License

@Override
public int run(String[] otherArgs) throws Exception {
    boolean generateSplitsInClient = false;
    SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
    try {/*ww w .jav  a 2  s.co m*/
        generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
        otherArgs = splitCmdLineParser.getRemainingArgs();
    } catch (ParseException e1) {
        System.err.println("Invalid options");
        printUsage();
        return 2;
    }

    if (otherArgs.length != 3) {
        printUsage();
        return 2;
    }

    String inputPath = otherArgs[0];
    String outputPath = otherArgs[1];
    String filterWord = otherArgs[2];

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(outputPath))) {
        System.err.println("Output directory : " + outputPath + " already exists");
        return 2;
    }

    TezConfiguration tezConf = new TezConfiguration(conf);

    fs.getWorkingDirectory();
    Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
    TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);

    String jarPath = ClassUtil.findContainingJar(FilterLinesByWordOneToOne.class);
    if (jarPath == null) {
        throw new TezUncheckedException("Could not find any jar containing"
                + FilterLinesByWordOneToOne.class.getName() + " in the classpath");
    }

    Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
    fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);

    Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(),
            remoteJarStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

    TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, null);
    tezSession.start(); // Why do I need to start the TezSession.

    Configuration stage1Conf = new JobConf(conf);
    stage1Conf.set(FILTER_PARAM_NAME, filterWord);

    Configuration stage2Conf = new JobConf(conf);

    stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
    stage2Conf.setBoolean("mapred.mapper.new-api", false);

    UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
    // Setup stage1 Vertex
    Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor
            .create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload))
            .addTaskLocalFiles(commonLocalResources);

    DataSourceDescriptor dsd;
    if (generateSplitsInClient) {
        // TODO TEZ-1406. Dont' use MRInputLegacy
        stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
        stage1Conf.setBoolean("mapred.mapper.new-api", false);
        dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
    } else {
        dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false)
                .build();
    }
    stage1Vertex.addDataSource("MRInput", dsd);

    // Setup stage2 Vertex
    Vertex stage2Vertex = Vertex.create("stage2",
            ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName())
                    .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)),
            dsd.getNumberOfShards());
    stage2Vertex.addTaskLocalFiles(commonLocalResources);

    // Configure the Output for stage2
    stage2Vertex.addDataSink("MROutput",
            DataSinkDescriptor.create(
                    OutputDescriptor.create(MROutput.class.getName())
                            .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)),
                    OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null));

    UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig
            .newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf)
            .build();

    DAG dag = DAG.create("FilterLinesByWord");
    Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultOneToOneEdgeProperty());
    dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);

    LOG.info("Submitting DAG to Tez Session");
    DAGClient dagClient = tezSession.submitDAG(dag);
    LOG.info("Submitted DAG to Tez Session");

    DAGStatus dagStatus = null;
    String[] vNames = { "stage1", "stage2" };
    try {
        while (true) {
            dagStatus = dagClient.getDAGStatus(null);
            if (dagStatus.getState() == DAGStatus.State.RUNNING
                    || dagStatus.getState() == DAGStatus.State.SUCCEEDED
                    || dagStatus.getState() == DAGStatus.State.FAILED
                    || dagStatus.getState() == DAGStatus.State.KILLED
                    || dagStatus.getState() == DAGStatus.State.ERROR) {
                break;
            }
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
                // continue;
            }
        }

        while (dagStatus.getState() == DAGStatus.State.RUNNING) {
            try {
                ExampleDriver.printDAGStatus(dagClient, vNames);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    // continue;
                }
                dagStatus = dagClient.getDAGStatus(null);
            } catch (TezException e) {
                LOG.fatal("Failed to get application progress. Exiting");
                return -1;
            }
        }
    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }

    ExampleDriver.printDAGStatus(dagClient, vNames);
    LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}

From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java

License:Apache License

public DAG createDAG(FileSystem remoteFs, Configuration conf, Path remoteStagingDir, int numMapper,
        int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime, int mapSleepCount,
        long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime, int iReduceSleepCount,
        boolean writeSplitsToDFS, boolean generateSplitsInAM) throws IOException, YarnException {

    Configuration mapStageConf = new JobConf(conf);
    mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    mapStageConf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    mapStageConf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    mapStageConf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime);
    mapStageConf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    mapStageConf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    mapStageConf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount);
    mapStageConf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount);
    mapStageConf.setInt(IREDUCE_TASKS_COUNT, numIReducer);
    mapStageConf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName());
    mapStageConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName());
    if (numIReducer == 0 && numReducer == 0) {
        mapStageConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName());
    }//from w ww.j  av  a 2  s  .  c om

    MRHelpers.translateMRConfToTez(mapStageConf);

    Configuration[] intermediateReduceStageConfs = null;
    if (iReduceStagesCount > 0 && numIReducer > 0) {
        intermediateReduceStageConfs = new JobConf[iReduceStagesCount];
        for (int i = 1; i <= iReduceStagesCount; ++i) {
            JobConf iReduceStageConf = new JobConf(conf);
            iReduceStageConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, iReduceSleepTime);
            iReduceStageConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, iReduceSleepCount);
            iReduceStageConf.setInt(MRJobConfig.NUM_REDUCES, numIReducer);
            iReduceStageConf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName());
            iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
            iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
            iReduceStageConf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());

            MRHelpers.translateMRConfToTez(iReduceStageConf);
            intermediateReduceStageConfs[i - 1] = iReduceStageConf;
        }
    }

    Configuration finalReduceConf = null;
    if (numReducer > 0) {
        finalReduceConf = new JobConf(conf);
        finalReduceConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, reduceSleepTime);
        finalReduceConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, reduceSleepCount);
        finalReduceConf.setInt(MRJobConfig.NUM_REDUCES, numReducer);
        finalReduceConf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName());
        finalReduceConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
        finalReduceConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
        finalReduceConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName());

        MRHelpers.translateMRConfToTez(finalReduceConf);
    }

    MRHelpers.configureMRApiUsage(mapStageConf);
    if (iReduceStagesCount > 0 && numIReducer > 0) {
        for (int i = 0; i < iReduceStagesCount; ++i) {
            MRHelpers.configureMRApiUsage(intermediateReduceStageConfs[i]);
        }
    }
    if (numReducer > 0) {
        MRHelpers.configureMRApiUsage(finalReduceConf);
    }

    DataSourceDescriptor dataSource = null;
    if (!generateSplitsInAM && writeSplitsToDFS) {

        LOG.info("Writing splits to DFS");
        dataSource = MRInputHelpers.configureMRInputWithLegacySplitGeneration(mapStageConf, remoteStagingDir,
                true);
    } else {
        dataSource = MRInputLegacy.createConfigBuilder(mapStageConf, SleepInputFormat.class)
                .generateSplitsInAM(generateSplitsInAM).build();
    }

    DAG dag = DAG.create("MRRSleepJob");
    String jarPath = ClassUtil.findContainingJar(getClass());
    if (jarPath == null) {
        throw new TezUncheckedException(
                "Could not find any jar containing" + " MRRSleepJob.class in the classpath");
    }
    Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar"));
    remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath);

    TokenCache.obtainTokensForNamenodes(this.credentials, new Path[] { remoteJarPath }, mapStageConf);

    Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(),
            jarFileStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

    List<Vertex> vertices = new ArrayList<Vertex>();

    UserPayload mapUserPayload = TezUtils.createUserPayloadFromConf(mapStageConf);
    int numTasks = generateSplitsInAM ? -1 : numMapper;

    Map<String, String> mapEnv = Maps.newHashMap();
    MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, mapEnv, true);
    Map<String, String> reduceEnv = Maps.newHashMap();
    MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, reduceEnv, false);

    Vertex mapVertex = Vertex.create("map",
            ProcessorDescriptor.create(MapProcessor.class.getName()).setUserPayload(mapUserPayload), numTasks,
            MRHelpers.getResourceForMRMapper(mapStageConf));
    mapVertex.addTaskLocalFiles(commonLocalResources).addDataSource("MRInput", dataSource)
            .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRMapper(mapStageConf)).setTaskEnvironment(mapEnv);
    vertices.add(mapVertex);

    if (iReduceStagesCount > 0 && numIReducer > 0) {
        for (int i = 0; i < iReduceStagesCount; ++i) {
            Configuration iconf = intermediateReduceStageConfs[i];
            UserPayload iReduceUserPayload = TezUtils.createUserPayloadFromConf(iconf);
            Vertex ivertex = Vertex.create("ireduce" + (i + 1),
                    ProcessorDescriptor.create(ReduceProcessor.class.getName())
                            .setUserPayload(iReduceUserPayload),
                    numIReducer, MRHelpers.getResourceForMRReducer(intermediateReduceStageConfs[i]));
            ivertex.addTaskLocalFiles(commonLocalResources)
                    .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(intermediateReduceStageConfs[i]))
                    .setTaskEnvironment(reduceEnv);
            vertices.add(ivertex);
        }
    }

    Vertex finalReduceVertex = null;
    if (numReducer > 0) {
        UserPayload reducePayload = TezUtils.createUserPayloadFromConf(finalReduceConf);
        finalReduceVertex = Vertex.create("reduce",
                ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(reducePayload),
                numReducer, MRHelpers.getResourceForMRReducer(finalReduceConf));
        finalReduceVertex.addTaskLocalFiles(commonLocalResources)
                .addDataSink("MROutput",
                        MROutputLegacy.createConfigBuilder(finalReduceConf, NullOutputFormat.class).build())
                .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(finalReduceConf))
                .setTaskEnvironment(reduceEnv);
        vertices.add(finalReduceVertex);
    } else {
        // Map only job
        mapVertex.addDataSink("MROutput",
                MROutputLegacy.createConfigBuilder(mapStageConf, NullOutputFormat.class).build());
    }

    Map<String, String> partitionerConf = Maps.newHashMap();
    partitionerConf.put(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());
    OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
            .newBuilder(IntWritable.class.getName(), IntWritable.class.getName(),
                    HashPartitioner.class.getName(), partitionerConf)
            .configureInput().useLegacyInput().done().build();

    for (int i = 0; i < vertices.size(); ++i) {
        dag.addVertex(vertices.get(i));
        if (i != 0) {
            dag.addEdge(
                    Edge.create(vertices.get(i - 1), vertices.get(i), edgeConf.createDefaultEdgeProperty()));
        }
    }

    return dag;
}

From source file:org.apache.tez.test.MiniTezCluster.java

License:Apache License

@Override
public void serviceInit(Configuration conf) throws Exception {
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME);
    // Use libs from cluster since no build is available
    conf.setBoolean(TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS, true);
    // blacklisting disabled to prevent scheduling issues
    conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
    if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) {
        conf.set(MRJobConfig.MR_AM_STAGING_DIR,
                new File(getTestWorkDir(), "apps_staging_dir" + Path.SEPARATOR).getAbsolutePath());
    }//from  www. j a  va2 s  .  co  m

    if (conf.get(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC) == null) {
        // nothing defined. set quick delete value
        conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l);
    }

    File appJarLocalFile = new File(MiniTezCluster.APPJAR);

    if (!appJarLocalFile.exists()) {
        String message = "TezAppJar " + MiniTezCluster.APPJAR + " not found. Exiting.";
        LOG.info(message);
        throw new TezUncheckedException(message);
    } else {
        LOG.info("Using Tez AppJar: " + appJarLocalFile.getAbsolutePath());
    }

    FileSystem fs = FileSystem.get(conf);
    Path testRootDir = fs.makeQualified(new Path("target", getName() + "-tmpDir"));
    Path appRemoteJar = new Path(testRootDir, "TezAppJar.jar");
    // Copy AppJar and make it public.
    Path appMasterJar = new Path(MiniTezCluster.APPJAR);
    fs.copyFromLocalFile(appMasterJar, appRemoteJar);
    fs.setPermission(appRemoteJar, new FsPermission("777"));

    conf.set(TezConfiguration.TEZ_LIB_URIS, appRemoteJar.toUri().toString());
    LOG.info("Set TEZ-LIB-URI to: " + conf.get(TezConfiguration.TEZ_LIB_URIS));

    // VMEM monitoring disabled, PMEM monitoring enabled.
    conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
    conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);

    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000");

    try {
        Path stagingPath = FileContext.getFileContext(conf)
                .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR)));
        /*
         * Re-configure the staging path on Windows if the file system is localFs.
         * We need to use a absolute path that contains the drive letter. The unit
         * test could run on a different drive than the AM. We can run into the
         * issue that job files are localized to the drive where the test runs on,
         * while the AM starts on a different drive and fails to find the job
         * metafiles. Using absolute path can avoid this ambiguity.
         */
        if (Path.WINDOWS) {
            if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) {
                conf.set(MRJobConfig.MR_AM_STAGING_DIR,
                        new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath());
            }
        }
        FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf);
        if (fc.util().exists(stagingPath)) {
            LOG.info(stagingPath + " exists! deleting...");
            fc.delete(stagingPath, true);
        }
        LOG.info("mkdir: " + stagingPath);
        fc.mkdir(stagingPath, null, true);

        //mkdir done directory as well
        String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf);
        Path doneDirPath = fc.makeQualified(new Path(doneDir));
        fc.mkdir(doneDirPath, null, true);
    } catch (IOException e) {
        throw new TezUncheckedException("Could not create staging directory. ", e);
    }
    conf.set(MRConfig.MASTER_ADDRESS, "test");

    //configure the shuffle service in NM
    conf.setStrings(YarnConfiguration.NM_AUX_SERVICES,
            new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID });
    conf.setClass(
            String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID),
            ShuffleHandler.class, Service.class);

    // Non-standard shuffle port
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);

    conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, DefaultContainerExecutor.class,
            ContainerExecutor.class);

    // TestMRJobs is for testing non-uberized operation only; see TestUberAM
    // for corresponding uberized tests.
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    super.serviceInit(conf);
}

From source file:org.apache.tez.tests.MiniTezClusterWithTimeline.java

License:Apache License

@Override
public void serviceInit(Configuration conf) throws Exception {
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME);
    // Use libs from cluster since no build is available
    conf.setBoolean(TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS, true);
    // blacklisting disabled to prevent scheduling issues
    conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
    if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) {
        conf.set(MRJobConfig.MR_AM_STAGING_DIR,
                new File(getTestWorkDir(), "apps_staging_dir" + Path.SEPARATOR).getAbsolutePath());
    }//from w w  w  .  j a  va  2  s. c o m

    if (conf.get(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC) == null) {
        // nothing defined. set quick delete value
        conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l);
    }

    File appJarLocalFile = new File(MiniTezClusterWithTimeline.APPJAR);

    if (!appJarLocalFile.exists()) {
        String message = "TezAppJar " + MiniTezClusterWithTimeline.APPJAR + " not found. Exiting.";
        LOG.info(message);
        throw new TezUncheckedException(message);
    } else {
        LOG.info("Using Tez AppJar: " + appJarLocalFile.getAbsolutePath());
    }

    FileSystem fs = FileSystem.get(conf);
    Path testRootDir = fs.makeQualified(new Path("target", getName() + "-tmpDir"));
    Path appRemoteJar = new Path(testRootDir, "TezAppJar.jar");
    // Copy AppJar and make it public.
    Path appMasterJar = new Path(MiniTezClusterWithTimeline.APPJAR);
    fs.copyFromLocalFile(appMasterJar, appRemoteJar);
    fs.setPermission(appRemoteJar, new FsPermission("777"));

    conf.set(TezConfiguration.TEZ_LIB_URIS, appRemoteJar.toUri().toString());
    LOG.info("Set TEZ-LIB-URI to: " + conf.get(TezConfiguration.TEZ_LIB_URIS));

    // VMEM monitoring disabled, PMEM monitoring enabled.
    conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
    conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);

    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000");

    try {
        Path stagingPath = FileContext.getFileContext(conf)
                .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR)));
        /*
         * Re-configure the staging path on Windows if the file system is localFs.
         * We need to use a absolute path that contains the drive letter. The unit
         * test could run on a different drive than the AM. We can run into the
         * issue that job files are localized to the drive where the test runs on,
         * while the AM starts on a different drive and fails to find the job
         * metafiles. Using absolute path can avoid this ambiguity.
         */
        if (Path.WINDOWS) {
            if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) {
                conf.set(MRJobConfig.MR_AM_STAGING_DIR,
                        new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath());
            }
        }
        FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf);
        if (fc.util().exists(stagingPath)) {
            LOG.info(stagingPath + " exists! deleting...");
            fc.delete(stagingPath, true);
        }
        LOG.info("mkdir: " + stagingPath);
        fc.mkdir(stagingPath, null, true);

        //mkdir done directory as well
        String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf);
        Path doneDirPath = fc.makeQualified(new Path(doneDir));
        fc.mkdir(doneDirPath, null, true);
    } catch (IOException e) {
        throw new TezUncheckedException("Could not create staging directory. ", e);
    }
    conf.set(MRConfig.MASTER_ADDRESS, "test");

    //configure the shuffle service in NM
    conf.setStrings(YarnConfiguration.NM_AUX_SERVICES,
            new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID });
    conf.setClass(
            String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID),
            ShuffleHandler.class, Service.class);

    // Non-standard shuffle port
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);

    conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, DefaultContainerExecutor.class,
            ContainerExecutor.class);

    // TestMRJobs is for testing non-uberized operation only; see TestUberAM
    // for corresponding uberized tests.
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    super.serviceInit(conf);
}

From source file:org.apache.tez.Tez_1494.java

License:Apache License

private Map<String, LocalResource> getLocalResources(TezConfiguration tezConf)
        throws IOException, URISyntaxException {
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    Path stagingDir = TezCommonUtils.getTezBaseStagingPath(tezConf);

    // staging dir
    FileSystem fs = FileSystem.get(tezConf);
    Path jobJar = new Path(stagingDir, "job.jar");
    if (fs.exists(jobJar)) {
        fs.delete(jobJar, false);/*from   w  w w.  j  a v a2s . c  om*/
    }
    fs.copyFromLocalFile(getCurrentJarURL(), jobJar);

    localResources.put("job.jar", createLocalResource(fs, jobJar));
    return localResources;
}

From source file:org.apache.tinkerpop.gremlin.giraph.process.computer.GiraphGraphComputer.java

License:Apache License

@Override
protected void loadJar(final org.apache.hadoop.conf.Configuration hadoopConfiguration, final File file,
        final Object... params) throws IOException {
    final FileSystem defaultFileSystem = FileSystem.get(hadoopConfiguration);
    try {/*from w  w w  .  j  a v  a 2s  .  c o m*/
        final Path jarFile = new Path(defaultFileSystem.getHomeDirectory() + "/hadoop-gremlin-"
                + Gremlin.version() + "-libs/" + file.getName());
        if (!defaultFileSystem.exists(jarFile)) {
            final Path sourcePath = new Path(file.getPath());
            final URI sourceUri = sourcePath.toUri();
            final FileSystem fs = FileSystem.get(sourceUri, hadoopConfiguration);
            fs.copyFromLocalFile(sourcePath, jarFile);
        }
        try {
            DistributedCache.addArchiveToClassPath(jarFile, this.giraphConfiguration, defaultFileSystem);
        } catch (final Exception e) {
            throw new RuntimeException(e.getMessage(), e);
        }
    } catch (final Exception e) {
        throw new IllegalStateException(e.getMessage(), e);
    }
}

From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.giraph.GiraphGraphComputer.java

License:Apache License

private void loadJars(final FileSystem fs) {
    final String hadoopGremlinLibsRemote = "hadoop-gremlin-libs";
    if (this.giraphConfiguration.getBoolean(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, true)) {
        final String hadoopGremlinLocalLibs = System.getenv(Constants.HADOOP_GREMLIN_LIBS);
        if (null == hadoopGremlinLocalLibs)
            LOGGER.warn(Constants.HADOOP_GREMLIN_LIBS + " is not set -- proceeding regardless");
        else {/*from   w ww  . j  av a  2  s  .  c  om*/
            final String[] paths = hadoopGremlinLocalLibs.split(":");
            for (final String path : paths) {
                final File file = new File(path);
                if (file.exists()) {
                    Stream.of(file.listFiles()).filter(f -> f.getName().endsWith(Constants.DOT_JAR))
                            .forEach(f -> {
                                try {
                                    final Path jarFile = new Path(fs.getHomeDirectory() + "/"
                                            + hadoopGremlinLibsRemote + "/" + f.getName());
                                    fs.copyFromLocalFile(new Path(f.getPath()), jarFile);
                                    try {
                                        DistributedCache.addArchiveToClassPath(jarFile,
                                                this.giraphConfiguration, fs);
                                    } catch (final Exception e) {
                                        throw new RuntimeException(e.getMessage(), e);
                                    }
                                } catch (Exception e) {
                                    throw new IllegalStateException(e.getMessage(), e);
                                }
                            });
                } else {
                    LOGGER.warn(path + " does not reference a valid directory -- proceeding regardless");
                }
            }
        }
    }
}

From source file:org.apache.vxquery.xtest.MiniDFS.java

License:Apache License

public void startHDFS() throws IOException {

    FileSystem lfs = FileSystem.getLocal(new Configuration());
    JobConf conf = new JobConf();
    String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
    Path hdfs_conf = new Path(PATH_TO_HADOOP_CONF);
    if (!lfs.exists(hdfs_conf)) {
        PATH_TO_HADOOP_CONF = "vxquery-xtest/src/test/resources/hadoop/conf";
        hdfs_conf = new Path(PATH_TO_HADOOP_CONF);
        if (!lfs.exists(hdfs_conf)) {
            PATH_TO_HADOOP_CONF = "../vxquery-xtest/src/test/resources/hadoop/conf";
            hdfs_conf = new Path(PATH_TO_HADOOP_CONF);
        }//from ww w  .  j  a v a2s. c o  m
    }
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    int numDataNodes = 1;
    int nameNodePort = 40000;

    // cleanup artifacts created on the local file system
    lfs.delete(new Path("build"), true);
    System.setProperty("hadoop.log.dir", "logs");
    MiniDFSCluster.Builder build = new MiniDFSCluster.Builder(conf);
    build.nameNodePort(nameNodePort);
    build.nameNodeHttpPort(nameNodePort + 34);
    build.numDataNodes(numDataNodes);
    build.checkExitOnShutdown(true);
    build.startupOption(StartupOption.REGULAR);
    build.format(true);
    build.waitSafeMode(true);
    dfsCluster = build.build();

    FileSystem dfs = FileSystem.get(conf);
    String DATA_PATH = "src/test/resources/TestSources/ghcnd";
    Path src = new Path(DATA_PATH);
    if (!lfs.exists(src)) {
        DATA_PATH = "vxquery-xtest/src/test/resources/TestSources/ghcnd";
        src = new Path(DATA_PATH);
        if (!lfs.exists(src)) {
            DATA_PATH = "../vxquery-xtest/src/test/resources/TestSources/ghcnd";
            src = new Path(DATA_PATH);
        }
    }
    dfs.mkdirs(new Path("/tmp"));
    Path dest = new Path("/tmp/vxquery-hdfs-test");
    dfs.copyFromLocalFile(src, dest);
    if (dfs.exists(dest)) {
        System.err.println("Test files copied to HDFS successfully");
    }
}

From source file:org.commoncrawl.service.crawler.CrawlLog.java

License:Open Source License

private Path transferLocalSegmentLog(FileSystem hdfs, File localSegmentLogFile, long checkpointId, int listId,
        int segmentId) throws IOException {

    if (localSegmentLogFile.exists()) {

        // determine the file's size ...
        // if > header size (in other words it has data ... )
        if (localSegmentLogFile.length() > CrawlSegmentLog.getHeaderSize()) {
            // construct a target path (where we are going to store the checkpointed
            // crawl log )
            Path remoteLogFileName = new Path(CrawlEnvironment.getCheckpointStagingDirectory(),
                    CrawlEnvironment.buildCrawlSegmentLogCheckpointFileName(getNodeName(), checkpointId) + "_"
                            + Integer.toString(listId) + "_" + Integer.toString(segmentId));

            hdfs.copyFromLocalFile(new Path(localSegmentLogFile.getAbsolutePath()), remoteLogFileName);

            return remoteLogFileName;
        }//from w w  w .j  a va2 s  .c  om
    }
    return null;
}

From source file:org.deeplearning4j.hadoop.util.HdfsUtils.java

License:Apache License

/**
 * Adapted from /*from   ww  w .  ja v  a 2  s.  c o m*/
 * http://terrier.org/docs/v3.5/javadoc/org/terrier/utility/io/HadoopUtility.html#saveClassPathToJob%28org.apache.hadoop.mapred.JobConf%29
 * @param jobConf
 * @throws IOException
 */
public static List<Path> saveClassPathToJob(JobConf jobConf) throws Exception {
    String hdfs = getHost(jobConf);

    HdfsLock lock = new HdfsLock(hdfs);
    String hdfs2 = getHdfs(jobConf);
    if (jobConf.get(HDFS_HOST) != null) {
        if (lock.isLocked()) {
            List<Path> ret = lock.getPaths();
            StringBuffer files = new StringBuffer();
            StringBuffer classPath = new StringBuffer();
            for (Path path : ret) {
                files.append(hdfs2 + path.toString());
                files.append(",");
                classPath.append(hdfs2 + path.toString());
                classPath.append(":");
                jobConf.addResource(path.toUri().toURL());
            }
            String classPathToSet = classPath.toString().substring(0, classPath.lastIndexOf(":"));
            String filesToSet = files.toString().substring(0, files.lastIndexOf(","));
            log.info("Setting class path " + classPathToSet);
            log.info("Using files " + filesToSet);
            jobConf.set("mapred.cache.files", filesToSet);
            jobConf.set("mapred.job.classpath.files", classPathToSet);
            return ret;
        }
    }
    List<Path> paths = new ArrayList<Path>();
    log.info("Copying classpath to job");

    final String[] jars = findJarFiles(new String[] { System.getenv().get("CLASSPATH"),
            System.getProperty("java.class.path"), System.getProperty("surefire.test.class.path") });

    final FileSystem defFS = FileSystem.get(jobConf);
    int numFilesWritten = 0;
    for (String jarFile : jars) {
        //class path issues
        if (jarFile.contains("hadoop-client")) {
            log.info("Skipping hadoop-client");
            continue;
        } else if (jarFile.contains("mapreduce-run")) {
            log.info("Skipping map reduce run");
            continue;
        }

        Path srcJarFilePath = new Path("file:///" + jarFile);
        String filename = srcJarFilePath.getName();
        Path tmpJarFilePath = makeFile(jobConf, filename);
        log.info("Uploading " + jarFile + " to " + tmpJarFilePath.toString());
        try {
            defFS.copyFromLocalFile(srcJarFilePath, tmpJarFilePath);
            jobConf.addResource(tmpJarFilePath);
            paths.add(tmpJarFilePath);
            numFilesWritten++;
        } catch (Exception e) {
            for (Path path : paths) {
                if (defFS.exists(path))
                    defFS.delete(path, true);
            }

            lock.close();
            log.error(String.format("Exception writing to hdfs; rolling back %d jar files ", numFilesWritten),
                    e);
            throw new IOException("Couldn't write jar file " + jarFile);
        }
    }
    try {
        lock.create(paths);
    } catch (KeeperException.SessionExpiredException e) {
        lock = new HdfsLock(hdfs);
        lock.create(paths);

    }

    lock.close();
    //resolve any differences by removing  clashing names in the files (archives are removed from files)

    Set<Path> remove = new HashSet<Path>();
    for (Path path : paths) {
        boolean exists = false;
        try {
            exists = defFS.exists(path);
        } catch (IllegalArgumentException e) {
            exists = false;
        }
        if (!exists)
            remove.add(path);
    }
    paths.removeAll(remove);
    return paths;
}