List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
public static void main(String[] args) throws Exception { // Defaults/*from www .ja va2 s . c o m*/ int numBspTask = 1; int numGpuBspTask = 1; int blockSize = BLOCK_SIZE; int gridSize = GRID_SIZE; long n = 10; // input vectors int k = 3; // start vectors int vectorDimension = 2; int maxIteration = 10; boolean useTestExampleInput = false; boolean isDebugging = false; boolean timeMeasurement = false; int GPUPercentage = 80; Configuration conf = new HamaConfiguration(); FileSystem fs = FileSystem.get(conf); // Set numBspTask to maxTasks // BSPJobClient jobClient = new BSPJobClient(conf); // ClusterStatus cluster = jobClient.getClusterStatus(true); // numBspTask = cluster.getMaxTasks(); if (args.length > 0) { if (args.length == 12) { numBspTask = Integer.parseInt(args[0]); numGpuBspTask = Integer.parseInt(args[1]); blockSize = Integer.parseInt(args[2]); gridSize = Integer.parseInt(args[3]); n = Long.parseLong(args[4]); k = Integer.parseInt(args[5]); vectorDimension = Integer.parseInt(args[6]); maxIteration = Integer.parseInt(args[7]); useTestExampleInput = Boolean.parseBoolean(args[8]); GPUPercentage = Integer.parseInt(args[9]); isDebugging = Boolean.parseBoolean(args[10]); timeMeasurement = Boolean.parseBoolean(args[11]); } else { System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); System.out.println(" Argument2=numGpuBspTask"); System.out.println(" Argument3=blockSize"); System.out.println(" Argument4=gridSize"); System.out.println(" Argument5=n | Number of input vectors (" + n + ")"); System.out.println(" Argument6=k | Number of start vectors (" + k + ")"); System.out.println( " Argument7=vectorDimension | Dimension of each vector (" + vectorDimension + ")"); System.out.println( " Argument8=maxIterations | Number of maximal iterations (" + maxIteration + ")"); System.out.println(" Argument9=testExample | Use testExample input (true|false=default)"); System.out.println(" Argument10=GPUPercentage (percentage of input)"); System.out.println(" Argument11=isDebugging (true|false=defaul)"); System.out.println(" Argument12=timeMeasurement (true|false=defaul)"); return; } } // Set config variables conf.setBoolean(CONF_DEBUG, isDebugging); conf.setBoolean("hama.pipes.logging", false); conf.setBoolean(CONF_TIME, timeMeasurement); // Set CPU tasks conf.setInt("bsp.peers.num", numBspTask); // Set GPU tasks conf.setInt("bsp.peers.gpu.num", numGpuBspTask); // Set GPU blockSize and gridSize conf.set(CONF_BLOCKSIZE, "" + blockSize); conf.set(CONF_GRIDSIZE, "" + gridSize); // Set maxIterations for KMeans conf.setInt(CONF_MAX_ITERATIONS, maxIteration); // Set n for KMeans conf.setLong(CONF_N, n); // Set GPU workload conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum")); LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE)); LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE)); LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE)); LOG.info("isDebugging: " + conf.get(CONF_DEBUG)); LOG.info("timeMeasurement: " + conf.get(CONF_TIME)); LOG.info("useTestExampleInput: " + useTestExampleInput); LOG.info("inputPath: " + CONF_INPUT_DIR); LOG.info("centersPath: " + CONF_CENTER_DIR); LOG.info("outputPath: " + CONF_OUTPUT_DIR); LOG.info("n: " + n); LOG.info("k: " + k); LOG.info("vectorDimension: " + vectorDimension); LOG.info("maxIteration: " + maxIteration); Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq"); Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq"); conf.set(CONF_CENTER_IN_PATH, centerIn.toString()); conf.set(CONF_CENTER_OUT_PATH, centerOut.toString()); // prepare Input if (useTestExampleInput) { // prepareTestInput(conf, fs, input, centerIn); prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension, null, GPUPercentage); } else { prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension, new Random(3337L), GPUPercentage); } BSPJob job = createKMeansHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR); long startTime = System.currentTimeMillis(); if (job.waitForCompletion(true)) { LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); if (isDebugging) { printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get()); printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable()); } if (k < 50) { printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get()); } } }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBenchmark.java
License:Apache License
@Override protected void setUp() throws Exception { m_conf = new Configuration(); // Try to load Hadoop configuration String HADOOP_HOME = System.getenv("HADOOP_HOME"); String HADOOP_INSTALL = System.getenv("HADOOP_INSTALL"); if ((HADOOP_HOME != null) || (HADOOP_INSTALL != null) && (!m_runLocally)) { String HADOOP = ((HADOOP_HOME != null) ? HADOOP_HOME : HADOOP_INSTALL); m_conf.addResource(new Path(HADOOP, "src/core/core-default.xml")); m_conf.addResource(new Path(HADOOP, "src/hdfs/hdfs-default.xml")); m_conf.addResource(new Path(HADOOP, "src/mapred/mapred-default.xml")); m_conf.addResource(new Path(HADOOP, "conf/core-site.xml")); m_conf.addResource(new Path(HADOOP, "conf/hdfs-site.xml")); m_conf.addResource(new Path(HADOOP, "conf/mapred-site.xml")); // System.out.println("Loaded Hadoop configuration from " + HADOOP); try {// ww w . j a v a 2 s. c o m // Connect to HDFS Filesystem FileSystem.get(m_conf); } catch (Exception e) { // HDFS not reachable run Benchmark locally m_conf = new Configuration(); m_runLocally = true; } // System.out.println("Run Benchmark local: " + m_runLocally); } // Try to load Hama configuration String HAMA_HOME = System.getenv("HAMA_HOME"); String HAMA_INSTALL = System.getenv("HAMA_INSTALL"); if ((HAMA_HOME != null) || (HAMA_INSTALL != null) && (!m_runLocally)) { String HAMA = ((HAMA_HOME != null) ? HAMA_HOME : HAMA_INSTALL); m_conf.addResource(new Path(HAMA, "conf/hama-default.xml")); m_conf.addResource(new Path(HAMA, "conf/hama-site.xml")); // System.out.println("Loaded Hama configuration from " + HAMA); } // Setup outputs m_OUTPUT_DIR_PATH = new Path(OUTPUT_DIR + "/bench_" + System.currentTimeMillis()); System.out.println("OUTPUT_DIR_PATH: " + m_OUTPUT_DIR_PATH); m_MATRIX_A_PATH = new Path(m_OUTPUT_DIR_PATH + "/MatrixA.seq"); m_MATRIX_B_PATH = new Path(m_OUTPUT_DIR_PATH + "/MatrixB.seq"); m_MATRIX_C_PATH = new Path(m_OUTPUT_DIR_PATH + "/MatrixC.seq"); m_MATRIX_D_PATH = new Path(m_OUTPUT_DIR_PATH + "/MatrixD.seq"); m_blockSize = MatrixMultiplicationHybridBSP.BLOCK_SIZE; m_gridSize = MatrixMultiplicationHybridBSP.GRID_SIZE; System.out.println("Benchmark MatrixMultiplication " + type + " [blockSize=" + m_blockSize + ",gridSize=" + m_gridSize + "] " + n + " x " + n + " matrix"); // Create random DistributedRowMatrix DistributedRowMatrix.createRandomDistributedRowMatrix(m_conf, n, n, new Random(42L), m_MATRIX_A_PATH, false); DistributedRowMatrix.createRandomDistributedRowMatrix(m_conf, n, n, new Random(1337L), m_MATRIX_B_PATH, (type == CalcType.CPU) ? true : false); // Load DistributedRowMatrix a and b m_matrixA = new DistributedRowMatrix(m_MATRIX_A_PATH, m_OUTPUT_DIR_PATH, n, n); m_matrixB = new DistributedRowMatrix(m_MATRIX_B_PATH, m_OUTPUT_DIR_PATH, n, n); m_matrixA.setConf(m_conf); m_matrixB.setConf(m_conf); }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBenchmark.java
License:Apache License
@Override protected void tearDown() throws Exception { verify();/*from w ww .j a v a 2 s . c om*/ // Cleanup FileSystem fs = FileSystem.get(m_conf); fs.delete(m_MATRIX_A_PATH, true); fs.delete(m_MATRIX_B_PATH, true); fs.delete(m_MATRIX_C_PATH, true); fs.delete(m_MATRIX_D_PATH, true); printOutput(m_conf); }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBenchmark.java
License:Apache License
static void printOutput(Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); FileStatus[] files = fs.listStatus(new Path(OUTPUT_DIR)); for (int i = 0; i < files.length; i++) { if (files[i].getLen() > 0) { System.out.println("File " + files[i].getPath()); FSDataInputStream in = fs.open(files[i].getPath()); IOUtils.copyBytes(in, System.out, conf, false); in.close();/*from w w w . j av a 2 s . co m*/ } } // fs.delete(FileOutputFormat.getOutputPath(job), true); }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBSP.java
License:Apache License
/********************************* CPU *********************************/ @Override/* w ww. ja v a2 s . c o m*/ public void setup( BSPPeer<IntWritable, PipesVectorWritable, IntWritable, PipesVectorWritable, MatrixRowMessage> peer) throws IOException { HamaConfiguration conf = peer.getConfiguration(); this.m_isDebuggingEnabled = conf.getBoolean(CONF_DEBUG, false); // Choose one as a master, who sorts the matrix rows at the end // m_masterTask = peer.getPeerName(peer.getNumPeers() / 2); // TODO // task must be 0 otherwise write out does NOT work! this.m_masterTask = peer.getPeerName(0); // Init logging if (m_isDebuggingEnabled) { try { FileSystem fs = FileSystem.get(conf); m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf)) + "/BSP_" + peer.getTaskId() + ".log")); } catch (IOException e) { e.printStackTrace(); } } // Load transposed Matrix B SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), new Path(conf.get(CONF_MATRIX_MULT_B_PATH)), conf); IntWritable bKey = new IntWritable(); PipesVectorWritable bVector = new PipesVectorWritable(); // for each col of matrix B (cause by transposed B) while (reader.next(bKey, bVector)) { m_bColumns.add(new KeyValuePair<Integer, DoubleVector>(bKey.get(), bVector.getVector())); if (m_isDebuggingEnabled) { m_logger.writeChars("setup,read,transposedMatrixB,key=" + bKey.get() + ",value=" + bVector.getVector().toString() + "\n"); } } reader.close(); }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBSP.java
License:Apache License
/********************************* GPU *********************************/ @Override/*from w w w.j a v a 2s . com*/ public void setupGpu( BSPPeer<IntWritable, PipesVectorWritable, IntWritable, PipesVectorWritable, MatrixRowMessage> peer) throws IOException, SyncException, InterruptedException { HamaConfiguration conf = peer.getConfiguration(); this.m_isDebuggingEnabled = conf.getBoolean(CONF_DEBUG, false); // Choose one as a master, who sorts the matrix rows at the end // m_masterTask = peer.getPeerName(peer.getNumPeers() / 2); // TODO // task must be 0 otherwise write out does NOT work! this.m_masterTask = peer.getPeerName(0); this.m_blockSize = Integer.parseInt(peer.getConfiguration().get(CONF_BLOCKSIZE)); this.m_gridSize = Integer.parseInt(peer.getConfiguration().get(CONF_GRIDSIZE)); // Init logging if (m_isDebuggingEnabled) { try { FileSystem fs = FileSystem.get(conf); m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf)) + "/BSP_" + peer.getTaskId() + ".log")); } catch (IOException e) { e.printStackTrace(); } } }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.util.DistributedRowMatrix.java
License:Apache License
public static DenseDoubleMatrix readDistributedRowMatrix(Configuration conf, Path path) { // System.out.println("readDistributedRowMatrix: " + path); List<DoubleVector> matrix = new ArrayList<DoubleVector>(); SequenceFile.Reader reader = null; try {/*w ww. j a va 2s .c o m*/ FileSystem fs = FileSystem.get(conf); reader = new SequenceFile.Reader(fs, path, conf); IntWritable key = new IntWritable(); PipesVectorWritable vector = new PipesVectorWritable(); while (reader.next(key, vector)) { // System.out.println("readDistributedRowMatrix: key: " + key // + Arrays.toString(vector.getVector().toArray())); matrix.add(vector.getVector()); } reader.close(); if (matrix.size() > 0) { DoubleVector list[] = new DoubleVector[matrix.size()]; DenseDoubleMatrix result = new DenseDoubleMatrix(matrix.toArray(list)); return result; } return null; } catch (IOException e) { e.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { e.printStackTrace(); } } } return null; }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.util.DistributedRowMatrix.java
License:Apache License
public static void writeDistributedRowMatrix(Configuration conf, double[][] matrix, int rows, int columns, Path path, boolean saveTransposed) { SequenceFile.Writer writer = null; try {/* w ww . j a va 2 s . c o m*/ FileSystem fs = FileSystem.get(conf); writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, PipesVectorWritable.class); if (saveTransposed) { // Transpose Matrix before saving double[][] transposed = new double[columns][rows]; for (int i = 0; i < rows; i++) { for (int j = 0; j < columns; j++) { transposed[j][i] = matrix[i][j]; } } matrix = transposed; } for (int i = 0; i < matrix.length; i++) { DenseDoubleVector rowVector = new DenseDoubleVector(matrix[i]); writer.append(new IntWritable(i), new PipesVectorWritable(rowVector)); } } catch (IOException e) { e.printStackTrace(); } finally { if (writer != null) { try { writer.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java
License:Apache License
public void setOutputTempPathString(String outPathString) { try {/* w w w . j a v a 2s. com*/ outputTmpBasePath = FileSystem.get(conf).makeQualified(new Path(outPathString)); } catch (IOException ioe) { LOG.error( "Unable to set outputBasePath to {}, leaving as {}" + outPathString + " " + outputTmpBasePath); } }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java
License:Apache License
public static DenseDoubleMatrix readDistributedRowMatrix(Configuration conf, Path path) { // System.out.println("readDistributedRowMatrix: " + path); List<DoubleVector> matrix = new ArrayList<DoubleVector>(); SequenceFile.Reader reader = null; try {//from w w w. jav a 2 s .co m FileSystem fs = FileSystem.get(conf); reader = new SequenceFile.Reader(fs, path, conf); IntWritable key = new IntWritable(); VectorWritable vector = new VectorWritable(); while (reader.next(key, vector)) { // System.out.println("readDistributedRowMatrix: key: " + key // + Arrays.toString(vector.getVector().toArray())); matrix.add(vector.getVector()); } reader.close(); if (matrix.size() > 0) { DoubleVector list[] = new DoubleVector[matrix.size()]; DenseDoubleMatrix result = new DenseDoubleMatrix(matrix.toArray(list)); return result; } return null; } catch (IOException e) { e.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { e.printStackTrace(); } } } return null; }