List of usage examples for org.apache.hadoop.fs FileSystem copyToLocalFile
public void copyToLocalFile(boolean delSrc, Path src, Path dst) throws IOException
From source file:ldbc.snb.datagen.generator.LDBCDatagen.java
License:Open Source License
public int runGenerateJob(Configuration conf) throws Exception { String hadoopPrefix = conf.get("ldbc.snb.datagen.serializer.hadoopDir"); FileSystem fs = FileSystem.get(conf); ArrayList<Float> percentages = new ArrayList<Float>(); percentages.add(0.45f);/*from w w w . j av a 2 s .c o m*/ percentages.add(0.45f); percentages.add(0.1f); //percentages.add(1.0f); //percentages.add(0.1f); long start = System.currentTimeMillis(); printProgress("Starting: Person generation"); long startPerson = System.currentTimeMillis(); HadoopPersonGenerator personGenerator = new HadoopPersonGenerator(conf); personGenerator.run(hadoopPrefix + "/persons", "ldbc.snb.datagen.hadoop.UniversityKeySetter"); long endPerson = System.currentTimeMillis(); printProgress("Creating university location correlated edges"); long startUniversity = System.currentTimeMillis(); HadoopKnowsGenerator knowsGenerator = new HadoopKnowsGenerator(conf, "ldbc.snb.datagen.hadoop.UniversityKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 0, conf.get("ldbc.snb.datagen.generator.knowsGenerator")); knowsGenerator.run(hadoopPrefix + "/persons", hadoopPrefix + "/universityEdges"); long endUniversity = System.currentTimeMillis(); printProgress("Creating main interest correlated edges"); long startInterest = System.currentTimeMillis(); knowsGenerator = new HadoopKnowsGenerator(conf, "ldbc.snb.datagen.hadoop.InterestKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 1, conf.get("ldbc.snb.datagen.generator.knowsGenerator")); knowsGenerator.run(hadoopPrefix + "/persons", hadoopPrefix + "/interestEdges"); long endInterest = System.currentTimeMillis(); printProgress("Creating random correlated edges"); long startRandom = System.currentTimeMillis(); knowsGenerator = new HadoopKnowsGenerator(conf, "ldbc.snb.datagen.hadoop.RandomKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 2, "ldbc.snb.datagen.generator.RandomKnowsGenerator"); knowsGenerator.run(hadoopPrefix + "/persons", hadoopPrefix + "/randomEdges"); long endRandom = System.currentTimeMillis(); fs.delete(new Path(DatagenParams.hadoopDir + "/persons"), true); printProgress("Merging the different edge files"); ArrayList<String> edgeFileNames = new ArrayList<String>(); edgeFileNames.add(hadoopPrefix + "/universityEdges"); edgeFileNames.add(hadoopPrefix + "/interestEdges"); edgeFileNames.add(hadoopPrefix + "/randomEdges"); long startMerge = System.currentTimeMillis(); HadoopMergeFriendshipFiles merger = new HadoopMergeFriendshipFiles(conf, "ldbc.snb.datagen.hadoop.RandomKeySetter"); merger.run(hadoopPrefix + "/mergedPersons", edgeFileNames); long endMerge = System.currentTimeMillis(); /*printProgress("Creating edges to fill the degree gap"); long startGap = System.currentTimeMillis(); knowsGenerator = new HadoopKnowsGenerator(conf,null, "ldbc.snb.datagen.hadoop.DegreeGapKeySetter", 1.0f); knowsGenerator.run(personsFileName2,personsFileName1); fs.delete(new Path(personsFileName2), true); long endGap = System.currentTimeMillis(); */ printProgress("Serializing persons"); long startPersonSerializing = System.currentTimeMillis(); if (conf.getBoolean("ldbc.snb.datagen.serializer.persons.sort", false) == false) { HadoopPersonSerializer serializer = new HadoopPersonSerializer(conf); serializer.run(hadoopPrefix + "/mergedPersons"); } else { HadoopPersonSortAndSerializer serializer = new HadoopPersonSortAndSerializer(conf); serializer.run(hadoopPrefix + "/mergedPersons"); } long endPersonSerializing = System.currentTimeMillis(); long startPersonActivity = System.currentTimeMillis(); if (conf.getBoolean("ldbc.snb.datagen.generator.activity", true)) { printProgress("Generating and serializing person activity"); HadoopPersonActivityGenerator activityGenerator = new HadoopPersonActivityGenerator(conf); activityGenerator.run(hadoopPrefix + "/mergedPersons"); int numThreads = DatagenParams.numThreads; int blockSize = DatagenParams.blockSize; int numBlocks = (int) Math.ceil(DatagenParams.numPersons / (double) blockSize); for (int i = 0; i < numThreads; ++i) { if (i < numBlocks) { fs.copyToLocalFile(false, new Path(DatagenParams.hadoopDir + "/m" + i + "personFactors.txt"), new Path("./")); fs.copyToLocalFile(false, new Path(DatagenParams.hadoopDir + "/m" + i + "activityFactors.txt"), new Path("./")); fs.copyToLocalFile(false, new Path(DatagenParams.hadoopDir + "/m0friendList" + i + ".csv"), new Path("./")); } } } long endPersonActivity = System.currentTimeMillis(); long startSortingUpdateStreams = System.currentTimeMillis(); if (conf.getBoolean("ldbc.snb.datagen.serializer.updateStreams", false)) { printProgress("Sorting update streams "); int blockSize = DatagenParams.blockSize; int numBlocks = (int) Math.ceil(DatagenParams.numPersons / (double) blockSize); List<String> personStreamsFileNames = new ArrayList<String>(); List<String> forumStreamsFileNames = new ArrayList<String>(); for (int i = 0; i < DatagenParams.numThreads; ++i) { int numPartitions = conf.getInt("ldbc.snb.datagen.serializer.numUpdatePartitions", 1); //if( i < numBlocks ) { for (int j = 0; j < numPartitions; ++j) { personStreamsFileNames .add(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j); if (conf.getBoolean("ldbc.snb.datagen.generator.activity", false)) { forumStreamsFileNames .add(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j); } } /*} else { for (int j = 0; j < numPartitions; ++j) { fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j), true); fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j), true); } } */ } HadoopUpdateStreamSorterAndSerializer updateSorterAndSerializer = new HadoopUpdateStreamSorterAndSerializer( conf); updateSorterAndSerializer.run(personStreamsFileNames, "person"); updateSorterAndSerializer.run(forumStreamsFileNames, "forum"); for (String file : personStreamsFileNames) { fs.delete(new Path(file), true); } for (String file : forumStreamsFileNames) { fs.delete(new Path(file), true); } /*for( int i = 0; i < DatagenParams.numThreads; ++i) { int numPartitions = conf.getInt("ldbc.snb.datagen.serializer.numUpdatePartitions", 1); if( i < numBlocks ) { for (int j = 0; j < numPartitions; ++j) { HadoopFileSorter updateStreamSorter = new HadoopFileSorter(conf, LongWritable.class, Text.class); HadoopUpdateStreamSerializer updateSerializer = new HadoopUpdateStreamSerializer(conf); updateStreamSorter.run(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j, DatagenParams.hadoopDir + "/updateStream_person_" + i + "_" + j); fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j), true); updateSerializer.run(DatagenParams.hadoopDir + "/updateStream_person_" + i + "_" + j, i, j, "person"); fs.delete(new Path(DatagenParams.hadoopDir + "/updateStream_person_" + i + "_" + j), true); if( conf.getBoolean("ldbc.snb.datagen.generator.activity", false)) { updateStreamSorter.run(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j, DatagenParams.hadoopDir + "/updateStream_forum_" + i + "_" + j); fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j), true); updateSerializer.run(DatagenParams.hadoopDir + "/updateStream_forum_" + i + "_" + j, i, j, "forum"); fs.delete(new Path(DatagenParams.hadoopDir + "/updateStream_forum_" + i + "_" + j), true); } } } else { for (int j = 0; j < numPartitions; ++j) { fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + "_" + j), true); fs.delete(new Path(DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + "_" + j), true); } } }*/ long minDate = Long.MAX_VALUE; long maxDate = Long.MIN_VALUE; long count = 0; for (int i = 0; i < DatagenParams.numThreads; ++i) { Path propertiesFile = new Path( DatagenParams.hadoopDir + "/temp_updateStream_person_" + i + ".properties"); FSDataInputStream file = fs.open(propertiesFile); Properties properties = new Properties(); properties.load(file); long aux; aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.min_write_event_start_time")); minDate = aux < minDate ? aux : minDate; aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.max_write_event_start_time")); maxDate = aux > maxDate ? aux : maxDate; aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.num_events")); count += aux; file.close(); fs.delete(propertiesFile, true); if (conf.getBoolean("ldbc.snb.datagen.generator.activity", false)) { propertiesFile = new Path( DatagenParams.hadoopDir + "/temp_updateStream_forum_" + i + ".properties"); file = fs.open(propertiesFile); properties = new Properties(); properties.load(file); aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.min_write_event_start_time")); minDate = aux < minDate ? aux : minDate; aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.max_write_event_start_time")); maxDate = aux > maxDate ? aux : maxDate; aux = Long.parseLong(properties.getProperty("ldbc.snb.interactive.num_events")); count += aux; file.close(); fs.delete(propertiesFile, true); } } OutputStream output = fs .create(new Path(DatagenParams.socialNetworkDir + "/updateStream" + ".properties"), true); output.write(new String("ldbc.snb.interactive.gct_delta_duration:" + DatagenParams.deltaTime + "\n") .getBytes()); output.write( new String("ldbc.snb.interactive.min_write_event_start_time:" + minDate + "\n").getBytes()); output.write( new String("ldbc.snb.interactive.max_write_event_start_time:" + maxDate + "\n").getBytes()); output.write(new String("ldbc.snb.interactive.update_interleave:" + (maxDate - minDate) / count + "\n") .getBytes()); output.write(new String("ldbc.snb.interactive.num_events:" + count).getBytes()); output.close(); } long endSortingUpdateStreams = System.currentTimeMillis(); printProgress("Serializing invariant schema "); long startInvariantSerializing = System.currentTimeMillis(); HadoopInvariantSerializer invariantSerializer = new HadoopInvariantSerializer(conf); invariantSerializer.run(); long endInvariantSerializing = System.currentTimeMillis(); long end = System.currentTimeMillis(); System.out.println(((end - start) / 1000) + " total seconds"); System.out.println("Person generation time: " + ((endPerson - startPerson) / 1000)); System.out.println( "University correlated edge generation time: " + ((endUniversity - startUniversity) / 1000)); System.out.println("Interest correlated edge generation time: " + ((endInterest - startInterest) / 1000)); System.out.println("Random correlated edge generation time: " + ((endRandom - startRandom) / 1000)); System.out.println("Edges merge time: " + ((endMerge - startMerge) / 1000)); System.out .println("Person serialization time: " + ((endPersonSerializing - startPersonSerializing) / 1000)); System.out.println("Person activity generation and serialization time: " + ((endPersonActivity - startPersonActivity) / 1000)); System.out.println( "Sorting update streams time: " + ((endSortingUpdateStreams - startSortingUpdateStreams) / 1000)); System.out.println("Invariant schema serialization time: " + ((endInvariantSerializing - startInvariantSerializing) / 1000)); System.out.println("Total Execution time: " + ((end - start) / 1000)); if (conf.getBoolean("ldbc.snb.datagen.parametergenerator.parameters", false) && conf.getBoolean("ldbc.snb.datagen.generator.activity", false)) { System.out.println("Running Parameter Generation"); System.out.println("Generating Interactive Parameters"); ProcessBuilder pb = new ProcessBuilder("mkdir", "-p", conf.get("ldbc.snb.datagen.serializer.outputDir") + "/substitution_parameters"); pb.directory(new File("./")); Process p = pb.start(); p.waitFor(); pb = new ProcessBuilder(conf.get("ldbc.snb.datagen.parametergenerator.python"), "paramgenerator/generateparams.py", "./", conf.get("ldbc.snb.datagen.serializer.outputDir") + "/substitution_parameters"); pb.directory(new File("./")); File logInteractive = new File("parameters_interactive.log"); pb.redirectErrorStream(true); pb.redirectOutput(ProcessBuilder.Redirect.appendTo(logInteractive)); p = pb.start(); p.waitFor(); System.out.println("Generating BI Parameters"); pb = new ProcessBuilder(conf.get("ldbc.snb.datagen.parametergenerator.python"), "paramgenerator/generateparamsbi.py", "./", conf.get("ldbc.snb.datagen.serializer.outputDir") + "/substitution_parameters"); pb.directory(new File("./")); File logBi = new File("parameters_bi.log"); pb.redirectErrorStream(true); pb.redirectOutput(ProcessBuilder.Redirect.appendTo(logBi)); p = pb.start(); p.waitFor(); System.out.println("Finished Parameter Generation"); } return 0; }
From source file:org.apache.falcon.regression.TestngListener.java
License:Apache License
private void dumpFalconStore(ITestResult result) throws IOException { if (Config.getBoolean("merlin.dump.staging", false)) { final String[] serverNames = Config.getStringArray("servers"); for (final String serverName : serverNames) { final ColoHelper coloHelper = new ColoHelper(serverName.trim()); final FileSystem clusterFs = coloHelper.getClusterHelper().getHadoopFS(); final String fileNameTemp = StringUtils.join(new String[] { serverName, result.getName(), Arrays.toString(result.getParameters()), TimeUtil.dateToOozieDate(new Date()), }, "-"); final String localFileName = fileNameTemp.replaceAll(":", "-"); LOGGER.info("Dumping staging contents to: " + fileNameTemp); clusterFs.copyToLocalFile(false, new Path(MerlinConstants.STAGING_LOCATION), new Path(localFileName)); }//from w w w . j a v a 2 s . c o m } }
From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopPopularWordsTest.java
License:Apache License
/** * Publish job execution results into local file system, so you can view them. * * @param fs Distributed file sytem used in job. * @throws IOException If failed./*from ww w .j a va 2 s.c o m*/ */ private void publishResults(FileSystem fs) throws IOException { X.println(">>> Cleaning up DFS input directory: " + BOOKS_DFS_DIR); fs.delete(BOOKS_DFS_DIR, true); X.println(">>> Cleaning up LOCAL result directory: " + RESULT_LOCAL_DIR); fs.delete(RESULT_LOCAL_DIR, true); X.println(">>> Moving job results into LOCAL result directory: " + RESULT_LOCAL_DIR); fs.copyToLocalFile(true, RESULT_DFS_DIR, RESULT_LOCAL_DIR); }
From source file:org.apache.mrql.Plan.java
License:Apache License
/** retrieve the compiled functional argument of code */ final static Function functional_argument(Configuration conf, Tree code) { Node n = (Node) code;//from w w w.j av a 2s .c o m if (n.name().equals("compiled")) try { // if the clent has not received the jar file with the compiled arguments, copy the file from HDFS if (Compiler.jar_path == null) { Path hdfs_path = new Path(conf.get("mrql.jar.path")); String local_path = Compiler.tmp_dir + "/mrql_args_" + random_generator.nextInt(1000000) + ".jar"; FileSystem fs = hdfs_path.getFileSystem(conf); fs.copyToLocalFile(false, hdfs_path, new Path("file://" + local_path)); Compiler.jar_path = local_path; } ; return Compiler.compiled(conf.getClassLoader(), n.children().nth(0).toString()); } catch (Exception ex) { System.err.println("*** Warning: Unable to retrieve the compiled lambda: " + code); return ((Lambda) Interpreter.evalE(n.children().nth(1))).lambda(); } else if (code.equals(Interpreter.identity_mapper)) return new Function() { public MRData eval(final MRData x) { return new Bag(x); } }; else return ((Lambda) Interpreter.evalE(code)).lambda(); }