Example usage for org.apache.hadoop.fs FileSystem copyToLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyToLocalFile.

Prototype

public void copyToLocalFile(Path src, Path dst) throws IOException

Source Link

Document

Copy it a file from the remote filesystem to the local one.

Usage

From source file:pegasus.SSSPResultInfo.java

License:Apache License

public int run(final String[] args) throws Exception {
    if (args.length != 8) {
        return printUsage();
    }/*from w w  w  .  j a  v a  2 s. c o m*/

    edge_path = new Path(args[0]);
    csr_path = new Path(args[1]);
    tempbm_path = new Path(args[2]);
    nextbm_path = new Path(args[3]);
    output_path = new Path(args[4]);
    summaryout_path = new Path("concmpt_summaryout");
    number_nodes = Integer.parseInt(args[5]);
    nreducers = Integer.parseInt(args[6]);

    if (args[7].compareTo("makesym") == 0)
        make_symmetric = 1;
    else
        make_symmetric = 0;

    System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n");
    System.out.println(
            "[PEGASUS] Computing breadth-first search. Edge path = " + args[0] + ", Reducers = " + nreducers);

    local_output_path = args[4] + "_temp";

    JobClient.runJob(configStage1());
    FileSystem.get(getConf()).rename(csr_path, tempbm_path);

    // Iteratively calculate neighborhood function. 
    for (int i = cur_iter; i < MAX_ITERATIONS; i++) {
        cur_iter++;

        JobClient.runJob(configStage2());
        JobClient.runJob(configStage3());

        FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path));

        final FileSystem fs = FileSystem.get(getConf());

        // copy neighborhood information from HDFS to local disk, and read it!
        String new_path = local_output_path + "/" + i;
        fs.copyToLocalFile(output_path, new Path(new_path));
        SSSPResultInfo ri = readIterationOutput(new_path);

        white_nodes[iter_counter] = ri.white;
        gray_nodes[iter_counter] = ri.gray;
        black_nodes[iter_counter] = ri.black;

        iter_counter++;

        System.out.println(
                "Hop " + i + " : white = " + ri.white + ", gray = " + ri.gray + ", black = " + ri.black);

        // Stop when the minimum neighborhood doesn't change
        if (ri.gray == 0) {
            System.out.println("All vertices have been reached. Finishing...");
            //fs.delete(csr_path);
            fs.delete(tempbm_path);
            fs.delete(output_path);
            fs.rename(nextbm_path, output_path);

            break;
        }

        // rotate directory
        //fs.delete(csr_path);
        fs.delete(tempbm_path);
        fs.delete(output_path);
        fs.rename(nextbm_path, tempbm_path);

    }

    //   FileUtil.fullyDelete( FileSystem.getLocal(getConf()), new Path(local_output_path));

    // calculate summary information using an additional stage
    //System.out.println("Summarizing connected components information...");
    //JobClient.runJob(configStage4());

    // finishing.
    System.out.println("\n[PEGASUS] Breadth-first search computed.");
    System.out.println("[PEGASUS] Total Iteration = " + iter_counter);
    System.out.println(
            "[PEGASUS] BFS distance labels are saved in the HDFS sssp_output as\n\"vertex   vertex   number   distance\" format.\n");

    return 0;
}

From source file:pl.edu.icm.coansys.heeut.TestMapReduce.java

License:Apache License

@Test(timeout = 1800000)
public void testWordCountDiff() throws Exception {

    String prefix = getCurrentDateAppended("wordcount");
    String inputDirName = prefix + "-input";
    String outputDirName = prefix + "-output";

    FileSystem dfs = UTIL.getDFSCluster().getFileSystem();
    Path inputDir = new Path(inputDirName);
    Path qualifiedInputDir = dfs.makeQualified(inputDir);

    String inputFileName = "src/test/resource/input/wordcount/apache_projects.dat";
    dfs.copyFromLocalFile(new Path(inputFileName), qualifiedInputDir);
    ToolRunner.run(UTIL.getConfiguration(), new WordCount(), new String[] { inputDirName, outputDirName });

    Path outputDir = new Path(outputDirName);
    Path qualifiedOutputDir = dfs.makeQualified(outputDir);

    String localOutputDir = "src/test/resource/output/wordcount/" + prefix;
    dfs.copyToLocalFile(qualifiedOutputDir, new Path(localOutputDir));

    File outputFile = new File(localOutputDir + "/part-00000");
    File expectedFile = new File("src/test/resource/exp/wordcount/apache_projects.exp");
    boolean isEqual = FileUtils.contentEquals(outputFile, expectedFile);
    Assert.assertTrue(isEqual);//  ww  w  .  j ava  2s . c om
}

From source file:sa.edu.kaust.twitter.preprocess.DetectRetweets.java

License:Apache License

@SuppressWarnings("unused")
public static void runDetectRetweet(String input, String output, String startID, String endID)
        throws Exception {

    /*if (args.length != 3) {
       printUsage();/*from  ww w.  java2s  .co  m*/
       return;
    }
            
    String input = args[0];
    String output = args[1];
    int reduceTasks = Integer.parseInt(args[2]);*/

    //Path inputPath = new Path("/shared/tweets2011");
    //Path outputPath = new Path("/user/telsayed/tweets2011");

    Path inputPath = new Path(input);
    Path outputPath = new Path(output);

    sLogger.info("input dir: " + inputPath);
    sLogger.info("output dir: " + outputPath);
    //sLogger.info("num of output files: " + reduceTasks);

    int mapTasks = 100;

    Configuration conf = new Configuration();
    conf.set("startID", startID);
    conf.set("endID", endID);
    FileSystem fs = FileSystem.get(conf);
    Job job = new Job(conf, "DetectRetweets");
    job.setJarByClass(DetectRetweets.class);

    /*if (fs.exists(outputPath)) {
       sLogger.info("Output path already exist: skipping!");
       return;
    }*/

    job.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    //conf.set("mapred.child.java.opts", "-Xmx2048m");

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(output), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    fs.copyToLocalFile(new Path(output + "/part-r-00000"), new Path("retweet/part-r-00000"));
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
}

From source file:smile.wide.algorithms.SMILEBSjob.java

License:Apache License

/** Sets up the hadoop job and sends it to the cluster
 * waits for the job to be completed.*/
@Override//  www  .  j  a  v  a  2 s.c  o  m
public int run(String[] params) throws Exception {
    //params: <trainfile> <output_path> <number of seeds>
    Configuration conf = super.getConf();
    conf.set("trainfile", params[0]);
    //distributed cache initialization
    DistributedCache.createSymlink(conf);

    DistributedCache.addFileToClassPath(new Path(libHDFSPath_ + "/smile.jar"), conf);
    DistributedCache.addCacheFile(new URI(libHDFSPath_ + "/libjsmile.so#libjsmile.so"), conf);
    //upload data file to HDFS and add it to the distributed cache
    FileSystem dfs = FileSystem.get(conf);
    dfs.copyFromLocalFile(new Path(params[0]), new Path(dataHDFSPath_));
    DistributedCache.addCacheFile(new URI(dataHDFSPath_ + basename(params[0]) + "#" + basename(params[0])),
            conf);

    //for now, keep the Bayesian search parameters constant
    conf.setInt("iterationCount", iterationCount);
    conf.setFloat("linkProbability", linkProbability);
    conf.setInt("maxParents", maxParents);
    conf.setInt("maxSearchTime", maxSearchTime);
    conf.setFloat("priorLinkProbability", priorLinkProbability);
    conf.setInt("priorSampleSize", priorSampleSize);
    //
    conf.setInt(RandSeedInputFormat.CONFKEY_SEED_COUNT, Integer.parseInt(params[2]));
    conf.setInt(RandSeedInputFormat.CONFKEY_WARMUP_ITER, 100000);
    conf.setLong("mapred.task.timeout", 3600000);

    Job job = new Job(conf);
    job.setJobName("Distributed Bayesian Search");
    job.setJarByClass(SMILEBSjob.class);
    job.setMapperClass(SMILEBSMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(StrucLogLikeWritable.class);
    job.setReducerClass(SMILEBSReducer.class);
    job.setNumReduceTasks(1);
    job.setInputFormatClass(RandSeedInputFormat.class);
    Path outputPath = new Path(params[1]);
    FileOutputFormat.setOutputPath(job, outputPath);
    outputPath.getFileSystem(conf).delete(outputPath, true);

    //Run the job
    job.waitForCompletion(true);
    //now download result
    outputPath.suffix("/part-r-00000");
    dfs.copyToLocalFile(outputPath.suffix("/part-r-00000"), new Path("./smile-output.txt"));
    return 0;
}