Example usage for org.apache.hadoop.fs FileSystem copyToLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyToLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyToLocalFile.

Prototype

public void copyToLocalFile(boolean delSrc, Path src, Path dst) throws IOException 

Source Link

Document

Copy it a file from a remote filesystem to the local one.

Usage

From source file:ldbc.snb.datagen.generator.LDBCDatagen.java

License:Open Source License

public int runGenerateJob(Configuration conf) throws Exception {

    String hadoopPrefix = conf.get("ldbc.snb.datagen.serializer.hadoopDir");
    FileSystem fs = FileSystem.get(conf);
    ArrayList<Float> percentages = new ArrayList<Float>();
    percentages.add(0.45f);/*from   w  w w  .  j av  a  2  s  .c  o  m*/
    percentages.add(0.45f);
    percentages.add(0.1f);

    //percentages.add(1.0f);
    //percentages.add(0.1f);

    long start = System.currentTimeMillis();
    printProgress("Starting: Person generation");
    long startPerson = System.currentTimeMillis();
    HadoopPersonGenerator personGenerator = new HadoopPersonGenerator(conf);
    personGenerator.run(hadoopPrefix + "/persons", "ldbc.snb.datagen.hadoop.UniversityKeySetter");
    long endPerson = System.currentTimeMillis();

    printProgress("Creating university location correlated edges");
    long startUniversity = System.currentTimeMillis();
    HadoopKnowsGenerator knowsGenerator = new HadoopKnowsGenerator(conf,
            "ldbc.snb.datagen.hadoop.UniversityKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter",
            percentages, 0, conf.get("ldbc.snb.datagen.generator.knowsGenerator"));

    knowsGenerator.run(hadoopPrefix + "/persons", hadoopPrefix + "/universityEdges");
    long endUniversity = System.currentTimeMillis();

    printProgress("Creating main interest correlated edges");
    long startInterest = System.currentTimeMillis();

    knowsGenerator = new HadoopKnowsGenerator(conf, "ldbc.snb.datagen.hadoop.InterestKeySetter",
            "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 1,
            conf.get("ldbc.snb.datagen.generator.knowsGenerator"));

    knowsGenerator.run(hadoopPrefix + "/persons", hadoopPrefix + "/interestEdges");
    long endInterest = System.currentTimeMillis();

    printProgress("Creating random correlated edges");
    long startRandom = System.currentTimeMillis();

    knowsGenerator = new HadoopKnowsGenerator(conf, "ldbc.snb.datagen.hadoop.RandomKeySetter",
            "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 2,
            "ldbc.snb.datagen.generator.RandomKnowsGenerator");

    knowsGenerator.run(hadoopPrefix + "/persons", hadoopPrefix + "/randomEdges");
    long endRandom = System.currentTimeMillis();

    fs.delete(new Path(DatagenParams.hadoopDir + "/persons"), true);
    printProgress("Merging the different edge files");
    ArrayList<String> edgeFileNames = new ArrayList<String>();
    edgeFileNames.add(hadoopPrefix + "/universityEdges");
    edgeFileNames.add(hadoopPrefix + "/interestEdges");
    edgeFileNames.add(hadoopPrefix + "/randomEdges");
    long startMerge = System.currentTimeMillis();
    HadoopMergeFriendshipFiles merger = new HadoopMergeFriendshipFiles(conf,
            "ldbc.snb.datagen.hadoop.RandomKeySetter");
    merger.run(hadoopPrefix + "/mergedPersons", edgeFileNames);
    long endMerge = System.currentTimeMillis();
    /*printProgress("Creating edges to fill the degree gap");
    long startGap = System.currentTimeMillis();
    knowsGenerator = new HadoopKnowsGenerator(conf,null, "ldbc.snb.datagen.hadoop.DegreeGapKeySetter", 1.0f);
    knowsGenerator.run(personsFileName2,personsFileName1);
    fs.delete(new Path(personsFileName2), true);
    long endGap = System.currentTimeMillis();
    */

    printProgress("Serializing persons");
    long startPersonSerializing = System.currentTimeMillis();
    if (conf.getBoolean("ldbc.snb.datagen.serializer.persons.sort", false) == false) {
        HadoopPersonSerializer serializer = new HadoopPersonSerializer(conf);
        serializer.run(hadoopPrefix + "/mergedPersons");
    } else {
        HadoopPersonSortAndSerializer serializer = new HadoopPersonSortAndSerializer(conf);
        serializer.run(hadoopPrefix + "/mergedPersons");
    }
    long endPersonSerializing = System.currentTimeMillis();

    long startPersonActivity = System.currentTimeMillis();
    if (conf.getBoolean("ldbc.snb.datagen.generator.activity", true)) {
        printProgress("Generating and serializing person activity");
        HadoopPersonActivityGenerator activityGenerator = new HadoopPersonActivityGenerator(conf);
        activityGenerator.run(hadoopPrefix + "/mergedPersons");

        int numThreads = DatagenParams.numThreads;
        int blockSize = DatagenParams.blockSize;
        int numBlocks = (int) Math.ceil(DatagenParams.numPersons / (double) blockSize);

        for (int i = 0; i < numThreads; ++i) {
            if (i < numBlocks) {
                fs.copyToLocalFile(false, new Path(DatagenParams.hadoopDir + "/m" + i + "personFactors.txt"),
                        new Path("./"));
                fs.copyToLocalFile(false, new Path(DatagenParams.hadoopDir + "/m" + i + "activityFactors.txt"),
                        new Path("./"));
                fs.copyToLocalFile(false, new Path(DatagenParams.hadoopDir + "/m0friendList" + i + ".csv"),
                        new Path("./"));
            }
        }
    }
    long endPersonActivity = System.currentTimeMillis();

    long startSortingUpdateStreams = System.currentTimeMillis();

    if (conf.getBoolean("ldbc.snb.datagen.serializer.updateStreams", false)) {

        printProgress("Sorting update streams ");

        int blockSize = DatagenParams.blockSize;
        int numBlocks = (int) Math.ceil(DatagenParams.numPersons / (double) blockSize);

        List<String> personStreamsFileNames = new ArrayList<String>();
        List<String> forumStreamsFileNames = new ArrayList<String>();
        for (int i = 0; i < DatagenParams.numThreads; ++i) {
            int numPartitions = conf.getInt("ldbc.snb.datagen.serializer.numUpdatePartitions", 1);
            //if( i < numBlocks ) {
            for (int j = 0; j < numPartitions; ++j) {
                personStreamsFileNames
                        .add(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j);
                if (conf.getBoolean("ldbc.snb.datagen.generator.activity", false)) {
                    forumStreamsFileNames
                            .add(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j);
                }
            }
            /*} else {
            for (int j = 0; j < numPartitions; ++j) {
                fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j), true);
                fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j), true);
            }
            } */
        }
        HadoopUpdateStreamSorterAndSerializer updateSorterAndSerializer = new HadoopUpdateStreamSorterAndSerializer(
                conf);
        updateSorterAndSerializer.run(personStreamsFileNames, "person");
        updateSorterAndSerializer.run(forumStreamsFileNames, "forum");
        for (String file : personStreamsFileNames) {
            fs.delete(new Path(file), true);
        }

        for (String file : forumStreamsFileNames) {
            fs.delete(new Path(file), true);
        }

        /*for( int i = 0; i < DatagenParams.numThreads; ++i) {
        int numPartitions = conf.getInt("ldbc.snb.datagen.serializer.numUpdatePartitions", 1);
        if( i < numBlocks ) {
            for (int j = 0; j < numPartitions; ++j) {
                HadoopFileSorter updateStreamSorter = new HadoopFileSorter(conf, LongWritable.class, Text.class);
                HadoopUpdateStreamSerializer updateSerializer = new HadoopUpdateStreamSerializer(conf);
                updateStreamSorter.run(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j, DatagenParams.hadoopDir + "/updateStream_person_" + i + "_" + j);
                fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j), true);
                updateSerializer.run(DatagenParams.hadoopDir + "/updateStream_person_" + i + "_" + j, i, j, "person");
                fs.delete(new Path(DatagenParams.hadoopDir + "/updateStream_person_" + i + "_" + j), true);
                if( conf.getBoolean("ldbc.snb.datagen.generator.activity", false)) {
                    updateStreamSorter.run(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j, DatagenParams.hadoopDir + "/updateStream_forum_" + i + "_" + j);
                    fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j), true);
                    updateSerializer.run(DatagenParams.hadoopDir + "/updateStream_forum_" + i + "_" + j, i, j, "forum");
                    fs.delete(new Path(DatagenParams.hadoopDir + "/updateStream_forum_" + i + "_" + j), true);
                }
            }
        } else {
            for (int j = 0; j < numPartitions; ++j) {
                fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j), true);
                fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j), true);
            }
        }
        }*/

        long minDate = Long.MAX_VALUE;
        long maxDate = Long.MIN_VALUE;
        long count = 0;
        for (int i = 0; i < DatagenParams.numThreads; ++i) {
            Path propertiesFile = new Path(
                    DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + ".properties");
            FSDataInputStream file = fs.open(propertiesFile);
            Properties properties = new Properties();
            properties.load(file);
            long aux;
            aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.min_write_event_start_time"));
            minDate = aux < minDate ? aux : minDate;
            aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.max_write_event_start_time"));
            maxDate = aux > maxDate ? aux : maxDate;
            aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.num_events"));
            count += aux;
            file.close();
            fs.delete(propertiesFile, true);

            if (conf.getBoolean("ldbc.snb.datagen.generator.activity", false)) {
                propertiesFile = new Path(
                        DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + ".properties");
                file = fs.open(propertiesFile);
                properties = new Properties();
                properties.load(file);
                aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.min_write_event_start_time"));
                minDate = aux < minDate ? aux : minDate;
                aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.max_write_event_start_time"));
                maxDate = aux > maxDate ? aux : maxDate;
                aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.num_events"));
                count += aux;
                file.close();
                fs.delete(propertiesFile, true);
            }
        }

        OutputStream output = fs
                .create(new Path(DatagenParams.socialNetworkDir + "/updateStream" + ".properties"), true);
        output.write(new String("ldbc.snb.interactive.gct_delta_duration:" + DatagenParams.deltaTime + "\n")
                .getBytes());
        output.write(
                new String("ldbc.snb.interactive.min_write_event_start_time:" + minDate + "\n").getBytes());
        output.write(
                new String("ldbc.snb.interactive.max_write_event_start_time:" + maxDate + "\n").getBytes());
        output.write(new String("ldbc.snb.interactive.update_interleave:" + (maxDate - minDate) / count + "\n")
                .getBytes());
        output.write(new String("ldbc.snb.interactive.num_events:" + count).getBytes());
        output.close();
    }

    long endSortingUpdateStreams = System.currentTimeMillis();

    printProgress("Serializing invariant schema ");
    long startInvariantSerializing = System.currentTimeMillis();
    HadoopInvariantSerializer invariantSerializer = new HadoopInvariantSerializer(conf);
    invariantSerializer.run();
    long endInvariantSerializing = System.currentTimeMillis();

    long end = System.currentTimeMillis();

    System.out.println(((end - start) / 1000) + " total seconds");
    System.out.println("Person generation time: " + ((endPerson - startPerson) / 1000));
    System.out.println(
            "University correlated edge generation time: " + ((endUniversity - startUniversity) / 1000));
    System.out.println("Interest correlated edge generation time: " + ((endInterest - startInterest) / 1000));
    System.out.println("Random correlated edge generation time: " + ((endRandom - startRandom) / 1000));
    System.out.println("Edges merge time: " + ((endMerge - startMerge) / 1000));
    System.out
            .println("Person serialization time: " + ((endPersonSerializing - startPersonSerializing) / 1000));
    System.out.println("Person activity generation and serialization time: "
            + ((endPersonActivity - startPersonActivity) / 1000));
    System.out.println(
            "Sorting update streams time: " + ((endSortingUpdateStreams - startSortingUpdateStreams) / 1000));
    System.out.println("Invariant schema serialization time: "
            + ((endInvariantSerializing - startInvariantSerializing) / 1000));
    System.out.println("Total Execution time: " + ((end - start) / 1000));

    if (conf.getBoolean("ldbc.snb.datagen.parametergenerator.parameters", false)
            && conf.getBoolean("ldbc.snb.datagen.generator.activity", false)) {
        System.out.println("Running Parameter Generation");
        System.out.println("Generating Interactive Parameters");
        ProcessBuilder pb = new ProcessBuilder("mkdir", "-p",
                conf.get("ldbc.snb.datagen.serializer.outputDir") + "/substitution_parameters");
        pb.directory(new File("./"));
        Process p = pb.start();
        p.waitFor();

        pb = new ProcessBuilder(conf.get("ldbc.snb.datagen.parametergenerator.python"),
                "paramgenerator/generateparams.py", "./",
                conf.get("ldbc.snb.datagen.serializer.outputDir") + "/substitution_parameters");
        pb.directory(new File("./"));
        File logInteractive = new File("parameters_interactive.log");
        pb.redirectErrorStream(true);
        pb.redirectOutput(ProcessBuilder.Redirect.appendTo(logInteractive));
        p = pb.start();
        p.waitFor();

        System.out.println("Generating BI Parameters");
        pb = new ProcessBuilder(conf.get("ldbc.snb.datagen.parametergenerator.python"),
                "paramgenerator/generateparamsbi.py", "./",
                conf.get("ldbc.snb.datagen.serializer.outputDir") + "/substitution_parameters");
        pb.directory(new File("./"));
        File logBi = new File("parameters_bi.log");
        pb.redirectErrorStream(true);
        pb.redirectOutput(ProcessBuilder.Redirect.appendTo(logBi));
        p = pb.start();
        p.waitFor();
        System.out.println("Finished Parameter Generation");
    }
    return 0;
}

From source file:org.apache.falcon.regression.TestngListener.java

License:Apache License

private void dumpFalconStore(ITestResult result) throws IOException {
    if (Config.getBoolean("merlin.dump.staging", false)) {
        final String[] serverNames = Config.getStringArray("servers");
        for (final String serverName : serverNames) {
            final ColoHelper coloHelper = new ColoHelper(serverName.trim());
            final FileSystem clusterFs = coloHelper.getClusterHelper().getHadoopFS();
            final String fileNameTemp = StringUtils.join(new String[] { serverName, result.getName(),
                    Arrays.toString(result.getParameters()), TimeUtil.dateToOozieDate(new Date()), }, "-");
            final String localFileName = fileNameTemp.replaceAll(":", "-");
            LOGGER.info("Dumping staging contents to: " + fileNameTemp);
            clusterFs.copyToLocalFile(false, new Path(MerlinConstants.STAGING_LOCATION),
                    new Path(localFileName));
        }//from  w  w  w . j a v  a  2 s . c o m
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopPopularWordsTest.java

License:Apache License

/**
 * Publish job execution results into local file system, so you can view them.
 *
 * @param fs Distributed file sytem used in job.
 * @throws IOException If failed./*from  ww w .j  a va  2  s.c  o m*/
 */
private void publishResults(FileSystem fs) throws IOException {
    X.println(">>> Cleaning up DFS input directory: " + BOOKS_DFS_DIR);

    fs.delete(BOOKS_DFS_DIR, true);

    X.println(">>> Cleaning up LOCAL result directory: " + RESULT_LOCAL_DIR);

    fs.delete(RESULT_LOCAL_DIR, true);

    X.println(">>> Moving job results into LOCAL result directory: " + RESULT_LOCAL_DIR);

    fs.copyToLocalFile(true, RESULT_DFS_DIR, RESULT_LOCAL_DIR);
}

From source file:org.apache.mrql.Plan.java

License:Apache License

/** retrieve the compiled functional argument of code */
final static Function functional_argument(Configuration conf, Tree code) {
    Node n = (Node) code;//from  w w w.j  av a  2s .c o m
    if (n.name().equals("compiled"))
        try {
            // if the clent has not received the jar file with the compiled arguments, copy the file from HDFS
            if (Compiler.jar_path == null) {
                Path hdfs_path = new Path(conf.get("mrql.jar.path"));
                String local_path = Compiler.tmp_dir + "/mrql_args_" + random_generator.nextInt(1000000)
                        + ".jar";
                FileSystem fs = hdfs_path.getFileSystem(conf);
                fs.copyToLocalFile(false, hdfs_path, new Path("file://" + local_path));
                Compiler.jar_path = local_path;
            }
            ;
            return Compiler.compiled(conf.getClassLoader(), n.children().nth(0).toString());
        } catch (Exception ex) {
            System.err.println("*** Warning: Unable to retrieve the compiled lambda: " + code);
            return ((Lambda) Interpreter.evalE(n.children().nth(1))).lambda();
        }
    else if (code.equals(Interpreter.identity_mapper))
        return new Function() {
            public MRData eval(final MRData x) {
                return new Bag(x);
            }
        };
    else
        return ((Lambda) Interpreter.evalE(code)).lambda();
}