Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(Path src, Path dst) throws IOException

Source Link

Document

The src file is on the local disk.

Usage

From source file:org.apache.rya.accumulo.mr.merge.CopyTool.java

License:Apache License

/**
 * Copies the file from the local file system into the HDFS.
 * @param localInputPath the local system input {@link Path}.
 * @param hdfsOutputPath the HDFS output {@link Path}.
 * @param configuration the {@link Configuration} to use.
 * @throws IOException// w  ww  .  ja  v a  2s. c  om
 */
public static void copyLocalToHdfs(final Path localInputPath, final Path hdfsOutputPath,
        final Configuration configuration) throws IOException {
    final FileSystem fs = FileSystem.get(configuration);
    fs.copyFromLocalFile(localInputPath, hdfsOutputPath);
}

From source file:org.apache.solr.hadoop.hack.MiniMRClientClusterFactory.java

License:Apache License

public static MiniMRClientCluster create(Class<?> caller, String identifier, int noOfNMs, Configuration conf,
        File testWorkDir) throws IOException {

    if (conf == null) {
        conf = new Configuration();
    }//from w  ww .j  av a  2s .c om

    FileSystem fs = FileSystem.get(conf);

    Path testRootDir = new Path(testWorkDir.getPath(), identifier + "-tmpDir").makeQualified(fs);
    Path appJar = new Path(testRootDir, "MRAppJar.jar");

    // Copy MRAppJar and make it private.
    Path appMasterJar = new Path(MiniMRYarnCluster.APPJAR);

    fs.copyFromLocalFile(appMasterJar, appJar);
    fs.setPermission(appJar, new FsPermission("744"));

    Job job = Job.getInstance(conf);

    job.addFileToClassPath(appJar);

    Path callerJar = new Path(JarFinder.getJar(caller));
    Path remoteCallerJar = new Path(testRootDir, callerJar.getName());
    fs.copyFromLocalFile(callerJar, remoteCallerJar);
    fs.setPermission(remoteCallerJar, new FsPermission("744"));
    job.addFileToClassPath(remoteCallerJar);

    MiniMRYarnCluster miniMRYarnCluster;
    try {
        miniMRYarnCluster = new MiniMRYarnCluster(identifier, noOfNMs, testWorkDir);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    job.getConfiguration().set("minimrclientcluster.caller.name", identifier);
    job.getConfiguration().setInt("minimrclientcluster.nodemanagers.number", noOfNMs);
    miniMRYarnCluster.init(job.getConfiguration());
    miniMRYarnCluster.start();

    return new MiniMRYarnClusterAdapter(miniMRYarnCluster, testWorkDir);
}

From source file:org.apache.solr.hadoop.MorphlineBasicMiniMRTest.java

License:Apache License

@Test
public void mrRun() throws Exception {
    FileSystem fs = dfsCluster.getFileSystem();
    Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
    fs.delete(inDir, true);/*  www  .j a va  2  s .co  m*/
    String DATADIR = "/user/testing/testMapperReducer/data";
    Path dataDir = fs.makeQualified(new Path(DATADIR));
    fs.delete(dataDir, true);
    Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
    fs.delete(outDir, true);

    assertTrue(fs.mkdirs(inDir));
    Path INPATH = new Path(inDir, "input.txt");
    OutputStream os = fs.create(INPATH);
    Writer wr = new OutputStreamWriter(os, "UTF-8");
    wr.write(DATADIR + "/" + inputAvroFile);
    wr.close();

    assertTrue(fs.mkdirs(dataDir));
    fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir);

    JobConf jobConf = getJobConf();
    if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints
        jobConf.set("mapred.job.tracker", "local");
    }
    jobConf.setMaxMapAttempts(1);
    jobConf.setMaxReduceAttempts(1);
    jobConf.setJar(SEARCH_ARCHIVES_JAR);
    jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);

    int shards = 2;
    int maxReducers = Integer.MAX_VALUE;
    if (ENABLE_LOCAL_JOB_RUNNER) {
        // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work.
        // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/
        maxReducers = 1;
        shards = 1;
    }

    String[] args = new String[] {
            "--morphline-file=" + RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf",
            "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose",
            numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
            numRuns % 3 == 0 ? "--reducers=" + shards
                    : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) };
    if (numRuns % 3 == 2) {
        args = concat(args, new String[] { "--fanout=2" });
    }
    if (numRuns == 0) {
        // force (slow) MapReduce based randomization to get coverage for that as well
        args = concat(new String[] { "-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1" },
                args);
    }
    MapReduceIndexerTool tool = createTool();
    int res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);
    Job job = tool.job;
    assertTrue(job.isComplete());
    assertTrue(job.isSuccessful());

    if (numRuns % 3 != 2) {
        // Only run this check if mtree merge is disabled.
        // With mtree merge enabled the BatchWriter counters aren't available anymore because 
        // variable "job" now refers to the merge job rather than the indexing job
        assertEquals(
                "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN,
                count,
                job.getCounters()
                        .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString())
                        .getValue());
    }

    // Check the output is as expected
    outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR);
    Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir));

    System.out.println("outputfiles:" + Arrays.toString(outputFiles));

    TestUtils.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards);

    // run again with --dryrun mode:  
    tool = createTool();
    args = concat(args, new String[] { "--dry-run" });
    res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);

    numRuns++;
}

From source file:org.apache.solr.hadoop.MorphlineGoLiveMiniMRTest.java

License:Apache License

private Path upAvroFile(FileSystem fs, Path inDir, String DATADIR, Path dataDir, String localFile)
        throws IOException, UnsupportedEncodingException {
    Path INPATH = new Path(inDir, "input.txt");
    OutputStream os = fs.create(INPATH);
    Writer wr = new OutputStreamWriter(os, "UTF-8");
    wr.write(DATADIR + File.separator + localFile);
    wr.close();//from  w w w  .j  a v a2  s .c o  m

    assertTrue(fs.mkdirs(dataDir));
    fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, localFile), dataDir);
    return INPATH;
}

From source file:org.apache.solr.hadoop.SolrOutputFormat.java

License:Apache License

public static void addSolrConfToDistributedCache(Job job, File solrHomeZip) throws IOException {
    // Make a reasonably unique name for the zip file in the distributed cache
    // to avoid collisions if multiple jobs are running.
    String hdfsZipName = UUID.randomUUID().toString() + '.' + ZIP_FILE_BASE_NAME;
    Configuration jobConf = job.getConfiguration();
    jobConf.set(ZIP_NAME, hdfsZipName);/*from  w w w  .  jav  a  2  s  .com*/

    Path zipPath = new Path("/tmp", getZipName(jobConf));
    FileSystem fs = FileSystem.get(jobConf);
    fs.copyFromLocalFile(new Path(solrHomeZip.toString()), zipPath);
    final URI baseZipUrl = fs.getUri().resolve(zipPath.toString() + '#' + getZipName(jobConf));

    DistributedCache.addCacheArchive(baseZipUrl, jobConf);
    LOG.debug("Set Solr distributed cache: {}", Arrays.asList(job.getCacheArchives()));
    LOG.debug("Set zipPath: {}", zipPath);
    // Actually send the path for the configuration zip file
    jobConf.set(SETUP_OK, zipPath.toString());
}

From source file:org.apache.spark.tez.utils.HadoopUtils.java

License:Apache License

/**
 * //w w  w .  j a  v a 2 s. c o  m
 * @param fs
 * @param sourcePath
 * @param destPath
 */
private static synchronized void provisioinResourceToFs(FileSystem fs, Path sourcePath, Path destPath) {
    try {
        if (logger.isDebugEnabled()) {
            logger.debug("Provisioning '" + sourcePath + "' to " + destPath);
        }
        if (!fs.exists(destPath)) {
            fs.copyFromLocalFile(sourcePath, destPath);
        } else {
            logger.debug("Skipping provisioning of " + destPath + " since it already exists.");
        }
    } catch (IOException e) {
        logger.warn("Failed to copy local resource " + sourcePath + " to " + destPath, e);
    }
}

From source file:org.apache.tajo.LocalTajoTestingUtility.java

License:Apache License

public void setup(String[] names, String[] tablepaths, Schema[] schemas, KeyValueSet option) throws Exception {
    LOG.info("===================================================");
    LOG.info("Starting Test Cluster.");
    LOG.info("===================================================");

    util = new TajoTestingCluster();
    util.startMiniCluster(1);//from  ww  w .  j a v a2 s  .  c om
    conf = util.getConfiguration();
    client = util.newTajoClient();

    FileSystem fs = util.getDefaultFileSystem();
    Path rootDir = TajoConf.getWarehouseDir(conf);
    fs.mkdirs(rootDir);
    for (int i = 0; i < tablepaths.length; i++) {
        Path localPath = new Path(tablepaths[i]);
        Path tablePath = new Path(rootDir, names[i]);
        fs.mkdirs(tablePath);
        Path dfsPath = new Path(tablePath, localPath.getName());
        fs.copyFromLocalFile(localPath, dfsPath);
        TableMeta meta = CatalogUtil.newTableMeta(CatalogProtos.StoreType.CSV, option);

        // Add fake table statistic data to tables.
        // It gives more various situations to unit tests.
        TableStats stats = new TableStats();
        stats.setNumBytes(TPCH.tableVolumes.get(names[i]));
        TableDesc tableDesc = new TableDesc(
                CatalogUtil.buildFQName(TajoConstants.DEFAULT_DATABASE_NAME, names[i]), schemas[i], meta,
                tablePath.toUri());
        tableDesc.setStats(stats);
        util.getMaster().getCatalog().createTable(tableDesc);
    }

    LOG.info("===================================================");
    LOG.info("Test Cluster ready and test table created.");
    LOG.info("===================================================");

}

From source file:org.apache.tajo.yarn.command.LaunchCommand.java

License:Apache License

/**
 * @return Destinate n/*  w ww  .j av a2s  .c om*/
 */
private Path addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, int appId,
        Map<String, LocalResource> localResources, LocalResourceType type) throws IOException {
    String suffix = appName + "/" + appId + "/" + fileSrcPath;
    Path dst = new Path(fs.getHomeDirectory(), suffix);
    fs.copyFromLocalFile(new Path(fileSrcPath), dst);
    FileStatus scFileStatus = fs.getFileStatus(dst);
    LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()), type,
            LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime());
    localResources.put(fileDstPath, scRsrc);
    return dst;
}

From source file:org.apache.tez.benchmark.SessionTest.java

License:Apache License

protected Map<String, LocalResource> getLocalResources(TezConfiguration tezConf)
        throws IOException, URISyntaxException {
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    Path stagingDir = TezCommonUtils.getTezBaseStagingPath(tezConf);

    // staging dir
    FileSystem fs = FileSystem.get(tezConf);
    Path jobJar = new Path(stagingDir, "job.jar");
    if (fs.exists(jobJar)) {
        fs.delete(jobJar, true);//  ww  w. j  av  a  2  s.co  m
    }
    fs.copyFromLocalFile(getCurrentJarURL(), jobJar);

    localResources.put("job.jar", createLocalResource(fs, jobJar));
    return localResources;
}

From source file:org.apache.tez.mapreduce.examples.FilterLinesByWord.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Credentials credentials = new Credentials();

    boolean generateSplitsInClient = false;

    SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
    try {// w  ww .  j av  a 2  s. c  o  m
        generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
        otherArgs = splitCmdLineParser.getRemainingArgs();
    } catch (ParseException e1) {
        System.err.println("Invalid options");
        printUsage();
        return 2;
    }

    if (otherArgs.length != 3) {
        printUsage();
        return 2;
    }

    String inputPath = otherArgs[0];
    String outputPath = otherArgs[1];
    String filterWord = otherArgs[2];

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(outputPath))) {
        System.err.println("Output directory : " + outputPath + " already exists");
        return 2;
    }

    TezConfiguration tezConf = new TezConfiguration(conf);

    fs.getWorkingDirectory();
    Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
    TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);

    String jarPath = ClassUtil.findContainingJar(FilterLinesByWord.class);
    if (jarPath == null) {
        throw new TezUncheckedException(
                "Could not find any jar containing" + FilterLinesByWord.class.getName() + " in the classpath");
    }

    Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
    fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);
    TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, conf);

    Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(),
            remoteJarStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

    TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources,
            credentials);
    tezSession.start(); // Why do I need to start the TezSession.

    Configuration stage1Conf = new JobConf(conf);
    stage1Conf.set(FILTER_PARAM_NAME, filterWord);

    Configuration stage2Conf = new JobConf(conf);
    stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
    stage2Conf.setBoolean("mapred.mapper.new-api", false);

    UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
    // Setup stage1 Vertex
    Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor
            .create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload))
            .addTaskLocalFiles(commonLocalResources);

    DataSourceDescriptor dsd;
    if (generateSplitsInClient) {
        // TODO TEZ-1406. Dont' use MRInputLegacy
        stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
        stage1Conf.setBoolean("mapred.mapper.new-api", false);
        dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
    } else {
        dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false)
                .build();
    }
    stage1Vertex.addDataSource("MRInput", dsd);

    // Setup stage2 Vertex
    Vertex stage2Vertex = Vertex.create("stage2",
            ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName())
                    .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)),
            1);
    stage2Vertex.addTaskLocalFiles(commonLocalResources);

    // Configure the Output for stage2
    OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName())
            .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf));
    OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MROutputCommitter.class.getName());
    stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, ocd, null));

    UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig
            .newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf)
            .build();

    DAG dag = DAG.create("FilterLinesByWord");
    Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultBroadcastEdgeProperty());
    dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);

    LOG.info("Submitting DAG to Tez Session");
    DAGClient dagClient = tezSession.submitDAG(dag);
    LOG.info("Submitted DAG to Tez Session");

    DAGStatus dagStatus = null;
    String[] vNames = { "stage1", "stage2" };
    try {
        while (true) {
            dagStatus = dagClient.getDAGStatus(null);
            if (dagStatus.getState() == DAGStatus.State.RUNNING
                    || dagStatus.getState() == DAGStatus.State.SUCCEEDED
                    || dagStatus.getState() == DAGStatus.State.FAILED
                    || dagStatus.getState() == DAGStatus.State.KILLED
                    || dagStatus.getState() == DAGStatus.State.ERROR) {
                break;
            }
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
                // continue;
            }
        }

        while (dagStatus.getState() == DAGStatus.State.RUNNING) {
            try {
                ExampleDriver.printDAGStatus(dagClient, vNames);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    // continue;
                }
                dagStatus = dagClient.getDAGStatus(null);
            } catch (TezException e) {
                LOG.fatal("Failed to get application progress. Exiting");
                return -1;
            }
        }

        dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));

    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }

    ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
    LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}