List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:PerformanceEvaluation.java
License:Apache License
private void runTest(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { MiniHBaseCluster hbaseMiniCluster = null; MiniDFSCluster dfsCluster = null;//from ww w. j a v a2s . c o m MiniZooKeeperCluster zooKeeperCluster = null; if (this.miniCluster) { dfsCluster = new MiniDFSCluster(conf, 2, true, (String[]) null); zooKeeperCluster = new MiniZooKeeperCluster(); int zooKeeperPort = zooKeeperCluster.startup(new File(System.getProperty("java.io.tmpdir"))); // mangle the conf so that the fs parameter points to the minidfs we // just started up FileSystem fs = dfsCluster.getFileSystem(); conf.set("fs.default.name", fs.getUri().toString()); conf.set("hbase.zookeeper.property.clientPort", Integer.toString(zooKeeperPort)); Path parentdir = fs.getHomeDirectory(); conf.set(HConstants.HBASE_DIR, parentdir.toString()); fs.mkdirs(parentdir); FSUtils.setVersion(fs, parentdir); hbaseMiniCluster = new MiniHBaseCluster(this.conf, N); } try { if (N == 1) { // If there is only one client and one HRegionServer, we assume nothing // has been set up at all. runNIsOne(cmd); } else { // Else, run runNIsMoreThanOne(cmd); } } finally { if (this.miniCluster) { if (hbaseMiniCluster != null) hbaseMiniCluster.shutdown(); if (zooKeeperCluster != null) zooKeeperCluster.shutdown(); HBaseTestCase.shutdownDfs(dfsCluster); } } }
From source file:WikipediaDocnoMappingBuilder.java
License:Apache License
@Override public int build(Path src, Path dest, Configuration conf) throws IOException { super.setConf(conf); try {/*ww w.j a va 2 s . c o m*/ return run(new String[] { "-" + INPUT_OPTION + "=" + src.toString(), "-" + OUTPUT_FILE_OPTION + "=" + dest.toString() }); } catch (Exception e) { throw new IOException(e); } }
From source file:alluxio.hadoop.mapreduce.KeyValueInputFormat.java
License:Apache License
/** * Returns a list of {@link KeyValueInputSplit} where each split is one key-value partition. * * @param jobContext MapReduce job configuration * @return list of {@link InputSplit}s, each split is a partition * @throws IOException if information about the partition cannot be retrieved *//*from w w w.j av a 2 s . c o m*/ @Override public List<InputSplit> getSplits(JobContext jobContext) throws IOException { // The paths are MapReduce program's inputs specified in // {@code mapreduce.input.fileinputformat.inputdir}, each path should be a key-value store. Path[] paths = FileInputFormat.getInputPaths(jobContext); List<InputSplit> splits = new ArrayList<>(); try { for (Path path : paths) { List<PartitionInfo> partitionInfos = mKeyValueMasterClient .getPartitionInfo(new AlluxioURI(path.toString())); for (PartitionInfo partitionInfo : partitionInfos) { splits.add(new KeyValueInputSplit(partitionInfo)); } } } catch (AlluxioException e) { throw new IOException(e); } return splits; }
From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java
License:Apache License
private static void prepareInput(Configuration conf, Path inputPath, Path exampleFile, int n) throws IOException { FileSystem fs = inputPath.getFileSystem(conf); // Create input file writers depending on bspTaskNum int bspTaskNum = conf.getInt("bsp.peers.num", 1); SequenceFile.Writer[] inputWriters = new SequenceFile.Writer[bspTaskNum]; for (int i = 0; i < bspTaskNum; i++) { Path inputFile = new Path(inputPath, "input" + i + ".seq"); LOG.info("inputFile: " + inputFile.toString()); inputWriters[i] = SequenceFile.createWriter(fs, conf, inputFile, IntWritable.class, NullWritable.class, CompressionType.NONE);/* ww w . ja v a 2 s . c o m*/ } // Create example file writer SequenceFile.Writer exampleWriter = SequenceFile.createWriter(fs, conf, exampleFile, IntWritable.class, NullWritable.class, CompressionType.NONE); // Write random values to input files and example IntWritable inputKey = new IntWritable(); NullWritable nullValue = NullWritable.get(); Random r = new Random(); for (long i = 0; i < n; i++) { inputKey.set(r.nextInt(n)); for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].append(inputKey, nullValue); } inputKey.set(r.nextInt(n)); exampleWriter.append(inputKey, nullValue); } // Close file writers for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].close(); } exampleWriter.close(); }
From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java
License:Apache License
public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException { Configuration conf = new HamaConfiguration(); if (args.length > 0) { if (args.length == 1) { conf.setInt("bsp.peers.num", Integer.parseInt(args[0])); } else {/*from w w w . ja va 2 s. co m*/ System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); return; } } else { // BSPJobClient jobClient = new BSPJobClient(conf); // ClusterStatus cluster = jobClient.getClusterStatus(true); // job.setNumBspTask(cluster.getMaxTasks()); conf.setInt("bsp.peers.num", 2); // 1 CPU and 1 GPU } // Enable one GPU task conf.setInt("bsp.peers.gpu.num", 1); conf.setBoolean("hama.pipes.logging", true); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumBspGpuTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum")); LOG.info("inputPath: " + CONF_INPUT_DIR); LOG.info("outputPath: " + CONF_OUTPUT_DIR); Path example = new Path(CONF_INPUT_DIR.getParent(), "example.seq"); conf.set(CONF_EXAMPLE_PATH, example.toString()); LOG.info("exampleFile: " + example.toString()); prepareInput(conf, CONF_INPUT_DIR, example, CONF_N); BSPJob job = createHelloHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR); long startTime = System.currentTimeMillis(); if (job.waitForCompletion(true)) { LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); // Print input files // printOutput(job, CONF_INPUT_DIR); // printOutput(job, example); // Print output printOutput(job, FileOutputFormat.getOutputPath(job)); } }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBenchmark.java
License:Apache License
@Override protected void setUp() throws Exception { m_conf = new Configuration(); // Try to load Hadoop configuration String HADOOP_HOME = System.getenv("HADOOP_HOME"); String HADOOP_INSTALL = System.getenv("HADOOP_INSTALL"); if ((HADOOP_HOME != null) || (HADOOP_INSTALL != null) && (!m_runLocally)) { String HADOOP = ((HADOOP_HOME != null) ? HADOOP_HOME : HADOOP_INSTALL); m_conf.addResource(new Path(HADOOP, "src/core/core-default.xml")); m_conf.addResource(new Path(HADOOP, "src/hdfs/hdfs-default.xml")); m_conf.addResource(new Path(HADOOP, "src/mapred/mapred-default.xml")); m_conf.addResource(new Path(HADOOP, "conf/core-site.xml")); m_conf.addResource(new Path(HADOOP, "conf/hdfs-site.xml")); m_conf.addResource(new Path(HADOOP, "conf/mapred-site.xml")); // System.out.println("Loaded Hadoop configuration from " + HADOOP); try {//from w w w . j a v a 2s .c o m // Connect to HDFS Filesystem FileSystem.get(m_conf); } catch (Exception e) { // HDFS not reachable run Benchmark locally m_conf = new Configuration(); m_runLocally = true; } // System.out.println("Run Benchmark local: " + m_runLocally); } // Try to load Hama configuration String HAMA_HOME = System.getenv("HAMA_HOME"); String HAMA_INSTALL = System.getenv("HAMA_INSTALL"); if ((HAMA_HOME != null) || (HAMA_INSTALL != null) && (!m_runLocally)) { String HAMA = ((HAMA_HOME != null) ? HAMA_HOME : HAMA_INSTALL); m_conf.addResource(new Path(HAMA, "conf/hama-default.xml")); m_conf.addResource(new Path(HAMA, "conf/hama-site.xml")); // System.out.println("Loaded Hama configuration from " + HAMA); } // Setup KMeans config variables m_conf.setBoolean(KMeansHybridBSP.CONF_DEBUG, false); m_conf.setBoolean("hama.pipes.logging", false); m_conf.setBoolean(KMeansHybridBSP.CONF_TIME, false); // Set GPU blockSize and gridSize m_conf.set(KMeansHybridBSP.CONF_BLOCKSIZE, "" + BLOCK_SIZE); m_conf.set(KMeansHybridBSP.CONF_GRIDSIZE, "" + GRID_SIZE); // Set maxIterations for KMeans m_conf.setInt(KMeansHybridBSP.CONF_MAX_ITERATIONS, maxIteration); // Set n for KMeans m_conf.setLong(KMeansHybridBSP.CONF_N, n); // Set GPUPercentage m_conf.setInt(KMeansHybridBSP.CONF_GPU_PERCENTAGE, GPUWorkload); Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq"); Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq"); m_conf.set(KMeansHybridBSP.CONF_CENTER_IN_PATH, centerIn.toString()); m_conf.set(KMeansHybridBSP.CONF_CENTER_OUT_PATH, centerOut.toString()); // CPU vs GPU benchmark // Plot 1 and 2 int numGpuBspTask = 0; // if (type == CalcType.GPU) { // bspTaskNum = 1; // numGpuBspTask = 1; // GPUWorkload = 100; // } // CPU + GPU Hybrid benchmark // Plot 3 if (bspTaskNum == maxTaskNum) { numGpuBspTask = 1; GPUWorkload = 75; } else { numGpuBspTask = 0; } // Set CPU tasks m_conf.setInt("bsp.peers.num", bspTaskNum); // Set GPU tasks m_conf.setInt("bsp.peers.gpu.num", numGpuBspTask); // Generate input data KMeansHybridBSP.prepareInputData(m_conf, FileSystem.get(m_conf), CONF_INPUT_DIR, centerIn, bspTaskNum, numGpuBspTask, n, k, vectorDimension, null, GPUWorkload); // Debug output // System.out.println("CalcType: " + type); System.out.println("CONF_TMP_DIR: " + CONF_TMP_DIR.toString()); System.out.println("NumBspTask: " + m_conf.getInt("bsp.peers.num", 0) + " NumGpuBspTask: " + m_conf.getInt("bsp.peers.gpu.num", 0)); System.out.println("n: " + n + " k: " + k + " vectorDimension: " + vectorDimension + " maxIteration: " + maxIteration + " GPUWorkload: " + GPUWorkload + "%"); }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
public static void main(String[] args) throws Exception { // Defaults/*from ww w . j a v a 2s .c om*/ int numBspTask = 1; int numGpuBspTask = 1; int blockSize = BLOCK_SIZE; int gridSize = GRID_SIZE; long n = 10; // input vectors int k = 3; // start vectors int vectorDimension = 2; int maxIteration = 10; boolean useTestExampleInput = false; boolean isDebugging = false; boolean timeMeasurement = false; int GPUPercentage = 80; Configuration conf = new HamaConfiguration(); FileSystem fs = FileSystem.get(conf); // Set numBspTask to maxTasks // BSPJobClient jobClient = new BSPJobClient(conf); // ClusterStatus cluster = jobClient.getClusterStatus(true); // numBspTask = cluster.getMaxTasks(); if (args.length > 0) { if (args.length == 12) { numBspTask = Integer.parseInt(args[0]); numGpuBspTask = Integer.parseInt(args[1]); blockSize = Integer.parseInt(args[2]); gridSize = Integer.parseInt(args[3]); n = Long.parseLong(args[4]); k = Integer.parseInt(args[5]); vectorDimension = Integer.parseInt(args[6]); maxIteration = Integer.parseInt(args[7]); useTestExampleInput = Boolean.parseBoolean(args[8]); GPUPercentage = Integer.parseInt(args[9]); isDebugging = Boolean.parseBoolean(args[10]); timeMeasurement = Boolean.parseBoolean(args[11]); } else { System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); System.out.println(" Argument2=numGpuBspTask"); System.out.println(" Argument3=blockSize"); System.out.println(" Argument4=gridSize"); System.out.println(" Argument5=n | Number of input vectors (" + n + ")"); System.out.println(" Argument6=k | Number of start vectors (" + k + ")"); System.out.println( " Argument7=vectorDimension | Dimension of each vector (" + vectorDimension + ")"); System.out.println( " Argument8=maxIterations | Number of maximal iterations (" + maxIteration + ")"); System.out.println(" Argument9=testExample | Use testExample input (true|false=default)"); System.out.println(" Argument10=GPUPercentage (percentage of input)"); System.out.println(" Argument11=isDebugging (true|false=defaul)"); System.out.println(" Argument12=timeMeasurement (true|false=defaul)"); return; } } // Set config variables conf.setBoolean(CONF_DEBUG, isDebugging); conf.setBoolean("hama.pipes.logging", false); conf.setBoolean(CONF_TIME, timeMeasurement); // Set CPU tasks conf.setInt("bsp.peers.num", numBspTask); // Set GPU tasks conf.setInt("bsp.peers.gpu.num", numGpuBspTask); // Set GPU blockSize and gridSize conf.set(CONF_BLOCKSIZE, "" + blockSize); conf.set(CONF_GRIDSIZE, "" + gridSize); // Set maxIterations for KMeans conf.setInt(CONF_MAX_ITERATIONS, maxIteration); // Set n for KMeans conf.setLong(CONF_N, n); // Set GPU workload conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum")); LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE)); LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE)); LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE)); LOG.info("isDebugging: " + conf.get(CONF_DEBUG)); LOG.info("timeMeasurement: " + conf.get(CONF_TIME)); LOG.info("useTestExampleInput: " + useTestExampleInput); LOG.info("inputPath: " + CONF_INPUT_DIR); LOG.info("centersPath: " + CONF_CENTER_DIR); LOG.info("outputPath: " + CONF_OUTPUT_DIR); LOG.info("n: " + n); LOG.info("k: " + k); LOG.info("vectorDimension: " + vectorDimension); LOG.info("maxIteration: " + maxIteration); Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq"); Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq"); conf.set(CONF_CENTER_IN_PATH, centerIn.toString()); conf.set(CONF_CENTER_OUT_PATH, centerOut.toString()); // prepare Input if (useTestExampleInput) { // prepareTestInput(conf, fs, input, centerIn); prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension, null, GPUPercentage); } else { prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension, new Random(3337L), GPUPercentage); } BSPJob job = createKMeansHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR); long startTime = System.currentTimeMillis(); if (job.waitForCompletion(true)) { LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); if (isDebugging) { printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get()); printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable()); } if (k < 50) { printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get()); } } }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
static void printFile(Configuration conf, FileSystem fs, Path file, Writable key, Writable value) throws IOException { System.out.println("File " + file.toString()); SequenceFile.Reader reader = null; try {/* ww w . ja va 2 s. c o m*/ reader = new SequenceFile.Reader(fs, file, conf); while (reader.next(key, value)) { System.out.println("key: '" + key.toString() + "' value: '" + value.toString() + "'\n"); } } catch (IOException e) { FSDataInputStream in = fs.open(file); IOUtils.copyBytes(in, System.out, conf, false); in.close(); } catch (NullPointerException e) { LOG.error(e); } finally { if (reader != null) { reader.close(); } } }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBSP.java
License:Apache License
public static BSPJob createMatrixMultiplicationHybridBSPConf(Configuration conf, Path aPath, Path bPath, Path outPath) throws IOException { BSPJob job = new BSPJob(new HamaConfiguration(conf)); // Set the job name job.setJobName("MatrixMultiplicationHybridBSP"); // set the BSP class which shall be executed job.setBspClass(MatrixMultiplicationHybridBSP.class); // help Hama to locale the jar to be distributed job.setJarByClass(MatrixMultiplicationHybridBSP.class); job.setInputFormat(SequenceFileInputFormat.class); job.setInputKeyClass(IntWritable.class); job.setInputValueClass(PipesVectorWritable.class); job.setInputPath(aPath);/*from w w w . j a v a2 s. c o m*/ job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PipesVectorWritable.class); job.setOutputPath(outPath); job.setMessageClass(MatrixRowMessage.class); job.set(CONF_MATRIX_MULT_B_PATH, bPath.toString()); job.set("bsp.child.java.opts", "-Xmx4G"); // Order message by row index job.set(MessageManager.RECEIVE_QUEUE_TYPE_CLASS, "org.apache.hama.bsp.message.queue.SortedMemoryQueue"); LOG.info("DEBUG: NumBspTask: " + job.getNumBspTask()); // "bsp.peers.num" LOG.info("DEBUG: bsp.job.split.file: " + job.get("bsp.job.split.file")); LOG.info("DEBUG: bsp.tasks.maximum: " + job.get("bsp.tasks.maximum")); LOG.info("DEBUG: bsp.input.dir: " + job.get("bsp.input.dir")); LOG.info("DEBUG: bsp.join.expr: " + job.get("bsp.join.expr")); return job; }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.MatrixMultiplicationHybridBSP.java
License:Apache License
public static BSPJob createMatrixMultiplicationHybridBSPConf(Configuration conf, Path matrixAPath, Path transposedMatrixBPath, Path matrixCPath, int tileWidth, boolean isDebugging) throws IOException { BSPJob job = new BSPJob(new HamaConfiguration(conf), MatrixMultiplicationHybridBSP.class); // Set the job name job.setJobName("MatrixMultiplicationHybridBSP"); // set the BSP class which shall be executed job.setBspClass(MatrixMultiplicationHybridBSP.class); // help Hama to locale the jar to be distributed job.setJarByClass(MatrixMultiplicationHybridBSP.class); job.setInputFormat(SequenceFileInputFormat.class); job.setInputKeyClass(IntWritable.class); job.setInputValueClass(VectorWritable.class); job.setInputPath(matrixAPath);/*from w w w . j a va 2 s . c o m*/ job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setOutputPath(matrixCPath); job.setMessageClass(MatrixRowMessage.class); job.set("bsp.child.java.opts", "-Xms1G -Xmx1G"); // Order message by row index job.set(MessageManager.RECEIVE_QUEUE_TYPE_CLASS, "org.apache.hama.bsp.message.queue.SortedMemoryQueue"); job.set(CONF_MATRIX_B_PATH, transposedMatrixBPath.toString()); job.set(CONF_TILE_WIDTH, "" + tileWidth); job.setBoolean(CONF_DEBUG, isDebugging); return job; }