List of usage examples for org.apache.hadoop.fs Path Path
public Path(Path parent, Path child)
From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java
License:Apache License
@Override public void bspGpu(BSPPeer<IntWritable, NullWritable, IntWritable, NullWritable, NullWritable> peer, Rootbeer rootbeer) throws IOException, SyncException, InterruptedException { BSPJob job = new BSPJob((HamaConfiguration) peer.getConfiguration()); FileSystem fs = FileSystem.get(peer.getConfiguration()); FSDataOutputStream outStream = fs//from ww w.j ava 2s . c om .create(new Path(FileOutputFormat.getOutputPath(job), peer.getTaskId() + ".log")); outStream.writeChars("HelloHybrid.bspGpu executed on GPU!\n"); HelloHybridKernel kernel = new HelloHybridKernel(peer.getConfiguration().get(CONF_EXAMPLE_PATH), CONF_N, "boo:and:foo", ":"); // Run GPU Kernels Context context = rootbeer.createDefaultContext(); Stopwatch watch = new Stopwatch(); watch.start(); // 1 Kernel within 1 Block rootbeer.run(kernel, new ThreadConfig(1, 1, 1), context); watch.stop(); List<StatsRow> stats = context.getStats(); for (StatsRow row : stats) { outStream.writeChars(" StatsRow:\n"); outStream.writeChars(" serial time: " + row.getSerializationTime() + "\n"); outStream.writeChars(" exec time: " + row.getExecutionTime() + "\n"); outStream.writeChars(" deserial time: " + row.getDeserializationTime() + "\n"); outStream.writeChars(" num blocks: " + row.getNumBlocks() + "\n"); outStream.writeChars(" num threads: " + row.getNumThreads() + "\n"); } outStream.writeChars("HelloHybridKernel,GPUTime=" + watch.elapsedTimeMillis() + "ms\n"); outStream.writeChars("HelloHybridKernel,peerName: '" + kernel.peerName + "'\n"); outStream.writeChars("HelloHybridKernel,numPeers: '" + kernel.numPeers + "'\n"); outStream.writeChars("HelloHybridKernel,summation: '" + Arrays.toString(kernel.summation) + "'\n"); outStream.writeChars("HelloHybridKernel,getAllPeerNames: '" + Arrays.toString(kernel.allPeerNames) + "'\n"); // test String.split outStream.writeChars("HelloHybridKernel,splitString: '" + kernel.splitString + "'\n"); outStream.writeChars("HelloHybridKernel,split(\"" + kernel.delimiter + "\") len: " + kernel.splits1.length + " values: '" + Arrays.toString(kernel.splits1) + "'\n"); outStream.writeChars("HelloHybridKernel,split(\"" + kernel.delimiter + "\",2) len: " + kernel.splits2.length + " values: '" + Arrays.toString(kernel.splits2) + "'\n"); outStream.writeChars("HelloHybridKernel,split(\"" + kernel.delimiter + "\",5) len: " + kernel.splits3.length + " values: '" + Arrays.toString(kernel.splits3) + "'\n"); outStream.writeChars("HelloHybridKernel,split(\"" + kernel.delimiter + "\",-2) len: " + kernel.splits4.length + " values: '" + Arrays.toString(kernel.splits4) + "'\n"); outStream.writeChars("HelloHybridKernel,split(\";\") len: " + kernel.splits5.length + " values: '" + Arrays.toString(kernel.splits5) + "'\n"); outStream.close(); }
From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java
License:Apache License
private static void prepareInput(Configuration conf, Path inputPath, Path exampleFile, int n) throws IOException { FileSystem fs = inputPath.getFileSystem(conf); // Create input file writers depending on bspTaskNum int bspTaskNum = conf.getInt("bsp.peers.num", 1); SequenceFile.Writer[] inputWriters = new SequenceFile.Writer[bspTaskNum]; for (int i = 0; i < bspTaskNum; i++) { Path inputFile = new Path(inputPath, "input" + i + ".seq"); LOG.info("inputFile: " + inputFile.toString()); inputWriters[i] = SequenceFile.createWriter(fs, conf, inputFile, IntWritable.class, NullWritable.class, CompressionType.NONE);/* ww w .j a v a 2 s .co m*/ } // Create example file writer SequenceFile.Writer exampleWriter = SequenceFile.createWriter(fs, conf, exampleFile, IntWritable.class, NullWritable.class, CompressionType.NONE); // Write random values to input files and example IntWritable inputKey = new IntWritable(); NullWritable nullValue = NullWritable.get(); Random r = new Random(); for (long i = 0; i < n; i++) { inputKey.set(r.nextInt(n)); for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].append(inputKey, nullValue); } inputKey.set(r.nextInt(n)); exampleWriter.append(inputKey, nullValue); } // Close file writers for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].close(); } exampleWriter.close(); }
From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java
License:Apache License
public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException { Configuration conf = new HamaConfiguration(); if (args.length > 0) { if (args.length == 1) { conf.setInt("bsp.peers.num", Integer.parseInt(args[0])); } else {/*from ww w. j a v a2 s .c o m*/ System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); return; } } else { // BSPJobClient jobClient = new BSPJobClient(conf); // ClusterStatus cluster = jobClient.getClusterStatus(true); // job.setNumBspTask(cluster.getMaxTasks()); conf.setInt("bsp.peers.num", 2); // 1 CPU and 1 GPU } // Enable one GPU task conf.setInt("bsp.peers.gpu.num", 1); conf.setBoolean("hama.pipes.logging", true); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumBspGpuTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum")); LOG.info("inputPath: " + CONF_INPUT_DIR); LOG.info("outputPath: " + CONF_OUTPUT_DIR); Path example = new Path(CONF_INPUT_DIR.getParent(), "example.seq"); conf.set(CONF_EXAMPLE_PATH, example.toString()); LOG.info("exampleFile: " + example.toString()); prepareInput(conf, CONF_INPUT_DIR, example, CONF_N); BSPJob job = createHelloHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR); long startTime = System.currentTimeMillis(); if (job.waitForCompletion(true)) { LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); // Print input files // printOutput(job, CONF_INPUT_DIR); // printOutput(job, example); // Print output printOutput(job, FileOutputFormat.getOutputPath(job)); } }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBenchmark.java
License:Apache License
@Override protected void setUp() throws Exception { m_conf = new Configuration(); // Try to load Hadoop configuration String HADOOP_HOME = System.getenv("HADOOP_HOME"); String HADOOP_INSTALL = System.getenv("HADOOP_INSTALL"); if ((HADOOP_HOME != null) || (HADOOP_INSTALL != null) && (!m_runLocally)) { String HADOOP = ((HADOOP_HOME != null) ? HADOOP_HOME : HADOOP_INSTALL); m_conf.addResource(new Path(HADOOP, "src/core/core-default.xml")); m_conf.addResource(new Path(HADOOP, "src/hdfs/hdfs-default.xml")); m_conf.addResource(new Path(HADOOP, "src/mapred/mapred-default.xml")); m_conf.addResource(new Path(HADOOP, "conf/core-site.xml")); m_conf.addResource(new Path(HADOOP, "conf/hdfs-site.xml")); m_conf.addResource(new Path(HADOOP, "conf/mapred-site.xml")); // System.out.println("Loaded Hadoop configuration from " + HADOOP); try {// w ww . j ava2s. c om // Connect to HDFS Filesystem FileSystem.get(m_conf); } catch (Exception e) { // HDFS not reachable run Benchmark locally m_conf = new Configuration(); m_runLocally = true; } // System.out.println("Run Benchmark local: " + m_runLocally); } // Try to load Hama configuration String HAMA_HOME = System.getenv("HAMA_HOME"); String HAMA_INSTALL = System.getenv("HAMA_INSTALL"); if ((HAMA_HOME != null) || (HAMA_INSTALL != null) && (!m_runLocally)) { String HAMA = ((HAMA_HOME != null) ? HAMA_HOME : HAMA_INSTALL); m_conf.addResource(new Path(HAMA, "conf/hama-default.xml")); m_conf.addResource(new Path(HAMA, "conf/hama-site.xml")); // System.out.println("Loaded Hama configuration from " + HAMA); } // Setup KMeans config variables m_conf.setBoolean(KMeansHybridBSP.CONF_DEBUG, false); m_conf.setBoolean("hama.pipes.logging", false); m_conf.setBoolean(KMeansHybridBSP.CONF_TIME, false); // Set GPU blockSize and gridSize m_conf.set(KMeansHybridBSP.CONF_BLOCKSIZE, "" + BLOCK_SIZE); m_conf.set(KMeansHybridBSP.CONF_GRIDSIZE, "" + GRID_SIZE); // Set maxIterations for KMeans m_conf.setInt(KMeansHybridBSP.CONF_MAX_ITERATIONS, maxIteration); // Set n for KMeans m_conf.setLong(KMeansHybridBSP.CONF_N, n); // Set GPUPercentage m_conf.setInt(KMeansHybridBSP.CONF_GPU_PERCENTAGE, GPUWorkload); Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq"); Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq"); m_conf.set(KMeansHybridBSP.CONF_CENTER_IN_PATH, centerIn.toString()); m_conf.set(KMeansHybridBSP.CONF_CENTER_OUT_PATH, centerOut.toString()); // CPU vs GPU benchmark // Plot 1 and 2 int numGpuBspTask = 0; // if (type == CalcType.GPU) { // bspTaskNum = 1; // numGpuBspTask = 1; // GPUWorkload = 100; // } // CPU + GPU Hybrid benchmark // Plot 3 if (bspTaskNum == maxTaskNum) { numGpuBspTask = 1; GPUWorkload = 75; } else { numGpuBspTask = 0; } // Set CPU tasks m_conf.setInt("bsp.peers.num", bspTaskNum); // Set GPU tasks m_conf.setInt("bsp.peers.gpu.num", numGpuBspTask); // Generate input data KMeansHybridBSP.prepareInputData(m_conf, FileSystem.get(m_conf), CONF_INPUT_DIR, centerIn, bspTaskNum, numGpuBspTask, n, k, vectorDimension, null, GPUWorkload); // Debug output // System.out.println("CalcType: " + type); System.out.println("CONF_TMP_DIR: " + CONF_TMP_DIR.toString()); System.out.println("NumBspTask: " + m_conf.getInt("bsp.peers.num", 0) + " NumGpuBspTask: " + m_conf.getInt("bsp.peers.gpu.num", 0)); System.out.println("n: " + n + " k: " + k + " vectorDimension: " + vectorDimension + " maxIteration: " + maxIteration + " GPUWorkload: " + GPUWorkload + "%"); }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
public static void main(String[] args) throws Exception { // Defaults//w ww .ja va 2s . c o m int numBspTask = 1; int numGpuBspTask = 1; int blockSize = BLOCK_SIZE; int gridSize = GRID_SIZE; long n = 10; // input vectors int k = 3; // start vectors int vectorDimension = 2; int maxIteration = 10; boolean useTestExampleInput = false; boolean isDebugging = false; boolean timeMeasurement = false; int GPUPercentage = 80; Configuration conf = new HamaConfiguration(); FileSystem fs = FileSystem.get(conf); // Set numBspTask to maxTasks // BSPJobClient jobClient = new BSPJobClient(conf); // ClusterStatus cluster = jobClient.getClusterStatus(true); // numBspTask = cluster.getMaxTasks(); if (args.length > 0) { if (args.length == 12) { numBspTask = Integer.parseInt(args[0]); numGpuBspTask = Integer.parseInt(args[1]); blockSize = Integer.parseInt(args[2]); gridSize = Integer.parseInt(args[3]); n = Long.parseLong(args[4]); k = Integer.parseInt(args[5]); vectorDimension = Integer.parseInt(args[6]); maxIteration = Integer.parseInt(args[7]); useTestExampleInput = Boolean.parseBoolean(args[8]); GPUPercentage = Integer.parseInt(args[9]); isDebugging = Boolean.parseBoolean(args[10]); timeMeasurement = Boolean.parseBoolean(args[11]); } else { System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); System.out.println(" Argument2=numGpuBspTask"); System.out.println(" Argument3=blockSize"); System.out.println(" Argument4=gridSize"); System.out.println(" Argument5=n | Number of input vectors (" + n + ")"); System.out.println(" Argument6=k | Number of start vectors (" + k + ")"); System.out.println( " Argument7=vectorDimension | Dimension of each vector (" + vectorDimension + ")"); System.out.println( " Argument8=maxIterations | Number of maximal iterations (" + maxIteration + ")"); System.out.println(" Argument9=testExample | Use testExample input (true|false=default)"); System.out.println(" Argument10=GPUPercentage (percentage of input)"); System.out.println(" Argument11=isDebugging (true|false=defaul)"); System.out.println(" Argument12=timeMeasurement (true|false=defaul)"); return; } } // Set config variables conf.setBoolean(CONF_DEBUG, isDebugging); conf.setBoolean("hama.pipes.logging", false); conf.setBoolean(CONF_TIME, timeMeasurement); // Set CPU tasks conf.setInt("bsp.peers.num", numBspTask); // Set GPU tasks conf.setInt("bsp.peers.gpu.num", numGpuBspTask); // Set GPU blockSize and gridSize conf.set(CONF_BLOCKSIZE, "" + blockSize); conf.set(CONF_GRIDSIZE, "" + gridSize); // Set maxIterations for KMeans conf.setInt(CONF_MAX_ITERATIONS, maxIteration); // Set n for KMeans conf.setLong(CONF_N, n); // Set GPU workload conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum")); LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE)); LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE)); LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE)); LOG.info("isDebugging: " + conf.get(CONF_DEBUG)); LOG.info("timeMeasurement: " + conf.get(CONF_TIME)); LOG.info("useTestExampleInput: " + useTestExampleInput); LOG.info("inputPath: " + CONF_INPUT_DIR); LOG.info("centersPath: " + CONF_CENTER_DIR); LOG.info("outputPath: " + CONF_OUTPUT_DIR); LOG.info("n: " + n); LOG.info("k: " + k); LOG.info("vectorDimension: " + vectorDimension); LOG.info("maxIteration: " + maxIteration); Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq"); Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq"); conf.set(CONF_CENTER_IN_PATH, centerIn.toString()); conf.set(CONF_CENTER_OUT_PATH, centerOut.toString()); // prepare Input if (useTestExampleInput) { // prepareTestInput(conf, fs, input, centerIn); prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension, null, GPUPercentage); } else { prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension, new Random(3337L), GPUPercentage); } BSPJob job = createKMeansHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR); long startTime = System.currentTimeMillis(); if (job.waitForCompletion(true)) { LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); if (isDebugging) { printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get()); printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable()); } if (k < 50) { printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get()); } } }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
/** * prepareInputData/*from ww w . jav a2 s .c om*/ * */ public static void prepareInputData(Configuration conf, FileSystem fs, Path in, Path centerIn, int numBspTask, int numGPUBspTask, long n, int k, int vectorDimension, Random rand, int GPUPercentage) throws IOException { // Delete input files if already exist if (fs.exists(in)) { fs.delete(in, true); } if (fs.exists(centerIn)) { fs.delete(centerIn, true); } final NullWritable nullValue = NullWritable.get(); final SequenceFile.Writer centerWriter = SequenceFile.createWriter(fs, conf, centerIn, PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); // Compute work distributions int cpuTaskNum = numBspTask - numGPUBspTask; long inputVectorsPerGPUTask = 0; long inputVectorsPerCPU = 0; long inputVectorsPerCPUTask = 0; if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) { inputVectorsPerGPUTask = (n * GPUPercentage) / 100; inputVectorsPerCPU = n - inputVectorsPerGPUTask; } else { inputVectorsPerCPU = n; } if (cpuTaskNum > 0) { inputVectorsPerCPUTask = inputVectorsPerCPU / cpuTaskNum; } // long interval = totalNumberOfPoints / numBspTask; long centers = 0; for (int part = 0; part < numBspTask; part++) { Path partIn = new Path(in, "part" + part + ".seq"); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); long interval = 0; if (part > cpuTaskNum) { interval = inputVectorsPerGPUTask; } else { interval = inputVectorsPerCPUTask; } long start = interval * part; long end = start + interval - 1; if ((numBspTask - 1) == part) { end = n; // set to totalNumberOfPoints } LOG.info("Partition " + part + ": from " + start + " to " + end); for (long i = start; i <= end; i++) { double[] arr = new double[vectorDimension]; for (int j = 0; j < vectorDimension; j++) { if (rand != null) { arr[j] = rand.nextInt((int) n); } else { arr[j] = i; } } PipesVectorWritable vector = new PipesVectorWritable(new DenseDoubleVector(arr)); // LOG.info("input[" + i + "]: " + Arrays.toString(arr)); dataWriter.append(vector, nullValue); if (k > centers) { // LOG.info("center[" + i + "]: " + Arrays.toString(arr)); centerWriter.append(vector, nullValue); centers++; } else { centerWriter.close(); } } dataWriter.close(); } }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBenchmark.java
License:Apache License
@Override protected void setUp() throws Exception { m_conf = new Configuration(); // Try to load Hadoop configuration String HADOOP_HOME = System.getenv("HADOOP_HOME"); String HADOOP_INSTALL = System.getenv("HADOOP_INSTALL"); if ((HADOOP_HOME != null) || (HADOOP_INSTALL != null) && (!m_runLocally)) { String HADOOP = ((HADOOP_HOME != null) ? HADOOP_HOME : HADOOP_INSTALL); m_conf.addResource(new Path(HADOOP, "src/core/core-default.xml")); m_conf.addResource(new Path(HADOOP, "src/hdfs/hdfs-default.xml")); m_conf.addResource(new Path(HADOOP, "src/mapred/mapred-default.xml")); m_conf.addResource(new Path(HADOOP, "conf/core-site.xml")); m_conf.addResource(new Path(HADOOP, "conf/hdfs-site.xml")); m_conf.addResource(new Path(HADOOP, "conf/mapred-site.xml")); // System.out.println("Loaded Hadoop configuration from " + HADOOP); try {/* w w w.j a va 2 s. co m*/ // Connect to HDFS Filesystem FileSystem.get(m_conf); } catch (Exception e) { // HDFS not reachable run Benchmark locally m_conf = new Configuration(); m_runLocally = true; } // System.out.println("Run Benchmark local: " + m_runLocally); } // Try to load Hama configuration String HAMA_HOME = System.getenv("HAMA_HOME"); String HAMA_INSTALL = System.getenv("HAMA_INSTALL"); if ((HAMA_HOME != null) || (HAMA_INSTALL != null) && (!m_runLocally)) { String HAMA = ((HAMA_HOME != null) ? HAMA_HOME : HAMA_INSTALL); m_conf.addResource(new Path(HAMA, "conf/hama-default.xml")); m_conf.addResource(new Path(HAMA, "conf/hama-site.xml")); // System.out.println("Loaded Hama configuration from " + HAMA); } // Setup outputs m_OUTPUT_DIR_PATH = new Path(OUTPUT_DIR + "/bench_" + System.currentTimeMillis()); System.out.println("OUTPUT_DIR_PATH: " + m_OUTPUT_DIR_PATH); m_MATRIX_A_PATH = new Path(m_OUTPUT_DIR_PATH + "/MatrixA.seq"); m_MATRIX_B_PATH = new Path(m_OUTPUT_DIR_PATH + "/MatrixB.seq"); m_MATRIX_C_PATH = new Path(m_OUTPUT_DIR_PATH + "/MatrixC.seq"); m_MATRIX_D_PATH = new Path(m_OUTPUT_DIR_PATH + "/MatrixD.seq"); m_blockSize = MatrixMultiplicationHybridBSP.BLOCK_SIZE; m_gridSize = MatrixMultiplicationHybridBSP.GRID_SIZE; System.out.println("Benchmark MatrixMultiplication " + type + " [blockSize=" + m_blockSize + ",gridSize=" + m_gridSize + "] " + n + " x " + n + " matrix"); // Create random DistributedRowMatrix DistributedRowMatrix.createRandomDistributedRowMatrix(m_conf, n, n, new Random(42L), m_MATRIX_A_PATH, false); DistributedRowMatrix.createRandomDistributedRowMatrix(m_conf, n, n, new Random(1337L), m_MATRIX_B_PATH, (type == CalcType.CPU) ? true : false); // Load DistributedRowMatrix a and b m_matrixA = new DistributedRowMatrix(m_MATRIX_A_PATH, m_OUTPUT_DIR_PATH, n, n); m_matrixB = new DistributedRowMatrix(m_MATRIX_B_PATH, m_OUTPUT_DIR_PATH, n, n); m_matrixA.setConf(m_conf); m_matrixB.setConf(m_conf); }