Example usage for org.apache.hadoop.fs FileSystem get

List of usage examples for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException 

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

public static void main(String[] args) throws Exception {

    // Defaults/*from www .ja va2 s  .  c  o m*/
    int numBspTask = 1;
    int numGpuBspTask = 1;
    int blockSize = BLOCK_SIZE;
    int gridSize = GRID_SIZE;
    long n = 10; // input vectors
    int k = 3; // start vectors
    int vectorDimension = 2;
    int maxIteration = 10;
    boolean useTestExampleInput = false;
    boolean isDebugging = false;
    boolean timeMeasurement = false;
    int GPUPercentage = 80;

    Configuration conf = new HamaConfiguration();
    FileSystem fs = FileSystem.get(conf);

    // Set numBspTask to maxTasks
    // BSPJobClient jobClient = new BSPJobClient(conf);
    // ClusterStatus cluster = jobClient.getClusterStatus(true);
    // numBspTask = cluster.getMaxTasks();

    if (args.length > 0) {
        if (args.length == 12) {
            numBspTask = Integer.parseInt(args[0]);
            numGpuBspTask = Integer.parseInt(args[1]);
            blockSize = Integer.parseInt(args[2]);
            gridSize = Integer.parseInt(args[3]);
            n = Long.parseLong(args[4]);
            k = Integer.parseInt(args[5]);
            vectorDimension = Integer.parseInt(args[6]);
            maxIteration = Integer.parseInt(args[7]);
            useTestExampleInput = Boolean.parseBoolean(args[8]);
            GPUPercentage = Integer.parseInt(args[9]);
            isDebugging = Boolean.parseBoolean(args[10]);
            timeMeasurement = Boolean.parseBoolean(args[11]);

        } else {
            System.out.println("Wrong argument size!");
            System.out.println("    Argument1=numBspTask");
            System.out.println("    Argument2=numGpuBspTask");
            System.out.println("    Argument3=blockSize");
            System.out.println("    Argument4=gridSize");
            System.out.println("    Argument5=n | Number of input vectors (" + n + ")");
            System.out.println("    Argument6=k | Number of start vectors (" + k + ")");
            System.out.println(
                    "    Argument7=vectorDimension | Dimension of each vector (" + vectorDimension + ")");
            System.out.println(
                    "    Argument8=maxIterations | Number of maximal iterations (" + maxIteration + ")");
            System.out.println("    Argument9=testExample | Use testExample input (true|false=default)");
            System.out.println("    Argument10=GPUPercentage (percentage of input)");
            System.out.println("    Argument11=isDebugging (true|false=defaul)");
            System.out.println("    Argument12=timeMeasurement (true|false=defaul)");
            return;
        }
    }

    // Set config variables
    conf.setBoolean(CONF_DEBUG, isDebugging);
    conf.setBoolean("hama.pipes.logging", false);
    conf.setBoolean(CONF_TIME, timeMeasurement);

    // Set CPU tasks
    conf.setInt("bsp.peers.num", numBspTask);
    // Set GPU tasks
    conf.setInt("bsp.peers.gpu.num", numGpuBspTask);
    // Set GPU blockSize and gridSize
    conf.set(CONF_BLOCKSIZE, "" + blockSize);
    conf.set(CONF_GRIDSIZE, "" + gridSize);
    // Set maxIterations for KMeans
    conf.setInt(CONF_MAX_ITERATIONS, maxIteration);
    // Set n for KMeans
    conf.setLong(CONF_N, n);
    // Set GPU workload
    conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage);

    LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
    LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
    LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum"));
    LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE));
    LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE));
    LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE));
    LOG.info("isDebugging: " + conf.get(CONF_DEBUG));
    LOG.info("timeMeasurement: " + conf.get(CONF_TIME));
    LOG.info("useTestExampleInput: " + useTestExampleInput);
    LOG.info("inputPath: " + CONF_INPUT_DIR);
    LOG.info("centersPath: " + CONF_CENTER_DIR);
    LOG.info("outputPath: " + CONF_OUTPUT_DIR);
    LOG.info("n: " + n);
    LOG.info("k: " + k);
    LOG.info("vectorDimension: " + vectorDimension);
    LOG.info("maxIteration: " + maxIteration);

    Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq");
    Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq");
    conf.set(CONF_CENTER_IN_PATH, centerIn.toString());
    conf.set(CONF_CENTER_OUT_PATH, centerOut.toString());

    // prepare Input
    if (useTestExampleInput) {
        // prepareTestInput(conf, fs, input, centerIn);
        prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension,
                null, GPUPercentage);
    } else {
        prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension,
                new Random(3337L), GPUPercentage);
    }

    BSPJob job = createKMeansHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR);

    long startTime = System.currentTimeMillis();
    if (job.waitForCompletion(true)) {
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

        if (isDebugging) {
            printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get());
            printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable());
        }

        if (k < 50) {
            printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get());
        }
    }
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBenchmark.java

License:Apache License

@Override
protected void setUp() throws Exception {
    m_conf = new Configuration();

    // Try to load Hadoop configuration
    String HADOOP_HOME = System.getenv("HADOOP_HOME");
    String HADOOP_INSTALL = System.getenv("HADOOP_INSTALL");
    if ((HADOOP_HOME != null) || (HADOOP_INSTALL != null) && (!m_runLocally)) {
        String HADOOP = ((HADOOP_HOME != null) ? HADOOP_HOME : HADOOP_INSTALL);

        m_conf.addResource(new Path(HADOOP, "src/core/core-default.xml"));
        m_conf.addResource(new Path(HADOOP, "src/hdfs/hdfs-default.xml"));
        m_conf.addResource(new Path(HADOOP, "src/mapred/mapred-default.xml"));
        m_conf.addResource(new Path(HADOOP, "conf/core-site.xml"));
        m_conf.addResource(new Path(HADOOP, "conf/hdfs-site.xml"));
        m_conf.addResource(new Path(HADOOP, "conf/mapred-site.xml"));
        // System.out.println("Loaded Hadoop configuration from " + HADOOP);

        try {// ww  w  .  j  a  v  a  2 s.  c o m
            // Connect to HDFS Filesystem
            FileSystem.get(m_conf);
        } catch (Exception e) {
            // HDFS not reachable run Benchmark locally
            m_conf = new Configuration();
            m_runLocally = true;
        }
        // System.out.println("Run Benchmark local: " + m_runLocally);
    }

    // Try to load Hama configuration
    String HAMA_HOME = System.getenv("HAMA_HOME");
    String HAMA_INSTALL = System.getenv("HAMA_INSTALL");
    if ((HAMA_HOME != null) || (HAMA_INSTALL != null) && (!m_runLocally)) {
        String HAMA = ((HAMA_HOME != null) ? HAMA_HOME : HAMA_INSTALL);

        m_conf.addResource(new Path(HAMA, "conf/hama-default.xml"));
        m_conf.addResource(new Path(HAMA, "conf/hama-site.xml"));
        // System.out.println("Loaded Hama configuration from " + HAMA);
    }

    // Setup outputs
    m_OUTPUT_DIR_PATH = new Path(OUTPUT_DIR + "/bench_" + System.currentTimeMillis());
    System.out.println("OUTPUT_DIR_PATH: " + m_OUTPUT_DIR_PATH);

    m_MATRIX_A_PATH = new Path(m_OUTPUT_DIR_PATH + "/MatrixA.seq");
    m_MATRIX_B_PATH = new Path(m_OUTPUT_DIR_PATH + "/MatrixB.seq");
    m_MATRIX_C_PATH = new Path(m_OUTPUT_DIR_PATH + "/MatrixC.seq");
    m_MATRIX_D_PATH = new Path(m_OUTPUT_DIR_PATH + "/MatrixD.seq");

    m_blockSize = MatrixMultiplicationHybridBSP.BLOCK_SIZE;
    m_gridSize = MatrixMultiplicationHybridBSP.GRID_SIZE;

    System.out.println("Benchmark MatrixMultiplication " + type + " [blockSize=" + m_blockSize + ",gridSize="
            + m_gridSize + "] " + n + " x " + n + " matrix");

    // Create random DistributedRowMatrix
    DistributedRowMatrix.createRandomDistributedRowMatrix(m_conf, n, n, new Random(42L), m_MATRIX_A_PATH,
            false);

    DistributedRowMatrix.createRandomDistributedRowMatrix(m_conf, n, n, new Random(1337L), m_MATRIX_B_PATH,
            (type == CalcType.CPU) ? true : false);

    // Load DistributedRowMatrix a and b
    m_matrixA = new DistributedRowMatrix(m_MATRIX_A_PATH, m_OUTPUT_DIR_PATH, n, n);
    m_matrixB = new DistributedRowMatrix(m_MATRIX_B_PATH, m_OUTPUT_DIR_PATH, n, n);
    m_matrixA.setConf(m_conf);
    m_matrixB.setConf(m_conf);
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBenchmark.java

License:Apache License

@Override
protected void tearDown() throws Exception {

    verify();/*from  w  ww  .j  a v a  2  s . c om*/

    // Cleanup
    FileSystem fs = FileSystem.get(m_conf);
    fs.delete(m_MATRIX_A_PATH, true);
    fs.delete(m_MATRIX_B_PATH, true);
    fs.delete(m_MATRIX_C_PATH, true);
    fs.delete(m_MATRIX_D_PATH, true);

    printOutput(m_conf);
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBenchmark.java

License:Apache License

static void printOutput(Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] files = fs.listStatus(new Path(OUTPUT_DIR));
    for (int i = 0; i < files.length; i++) {
        if (files[i].getLen() > 0) {
            System.out.println("File " + files[i].getPath());
            FSDataInputStream in = fs.open(files[i].getPath());
            IOUtils.copyBytes(in, System.out, conf, false);
            in.close();/*from w  w  w .  j  av  a  2  s .  co m*/
        }
    }
    // fs.delete(FileOutputFormat.getOutputPath(job), true);
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBSP.java

License:Apache License

/********************************* CPU *********************************/
@Override/* w ww.  ja  v  a2  s  .  c o  m*/
public void setup(
        BSPPeer<IntWritable, PipesVectorWritable, IntWritable, PipesVectorWritable, MatrixRowMessage> peer)
        throws IOException {

    HamaConfiguration conf = peer.getConfiguration();
    this.m_isDebuggingEnabled = conf.getBoolean(CONF_DEBUG, false);

    // Choose one as a master, who sorts the matrix rows at the end
    // m_masterTask = peer.getPeerName(peer.getNumPeers() / 2);

    // TODO
    // task must be 0 otherwise write out does NOT work!
    this.m_masterTask = peer.getPeerName(0);

    // Init logging
    if (m_isDebuggingEnabled) {
        try {
            FileSystem fs = FileSystem.get(conf);
            m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf))
                    + "/BSP_" + peer.getTaskId() + ".log"));

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    // Load transposed Matrix B
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf),
            new Path(conf.get(CONF_MATRIX_MULT_B_PATH)), conf);

    IntWritable bKey = new IntWritable();
    PipesVectorWritable bVector = new PipesVectorWritable();

    // for each col of matrix B (cause by transposed B)
    while (reader.next(bKey, bVector)) {
        m_bColumns.add(new KeyValuePair<Integer, DoubleVector>(bKey.get(), bVector.getVector()));
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("setup,read,transposedMatrixB,key=" + bKey.get() + ",value="
                    + bVector.getVector().toString() + "\n");
        }
    }
    reader.close();
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBSP.java

License:Apache License

/********************************* GPU *********************************/
@Override/*from   w  w w.j a  v a  2s  . com*/
public void setupGpu(
        BSPPeer<IntWritable, PipesVectorWritable, IntWritable, PipesVectorWritable, MatrixRowMessage> peer)
        throws IOException, SyncException, InterruptedException {

    HamaConfiguration conf = peer.getConfiguration();
    this.m_isDebuggingEnabled = conf.getBoolean(CONF_DEBUG, false);

    // Choose one as a master, who sorts the matrix rows at the end
    // m_masterTask = peer.getPeerName(peer.getNumPeers() / 2);

    // TODO
    // task must be 0 otherwise write out does NOT work!
    this.m_masterTask = peer.getPeerName(0);

    this.m_blockSize = Integer.parseInt(peer.getConfiguration().get(CONF_BLOCKSIZE));

    this.m_gridSize = Integer.parseInt(peer.getConfiguration().get(CONF_GRIDSIZE));

    // Init logging
    if (m_isDebuggingEnabled) {
        try {
            FileSystem fs = FileSystem.get(conf);
            m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf))
                    + "/BSP_" + peer.getTaskId() + ".log"));

        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.util.DistributedRowMatrix.java

License:Apache License

public static DenseDoubleMatrix readDistributedRowMatrix(Configuration conf, Path path) {

    // System.out.println("readDistributedRowMatrix: " + path);

    List<DoubleVector> matrix = new ArrayList<DoubleVector>();

    SequenceFile.Reader reader = null;
    try {/*w ww.  j a  va  2s .c  o m*/
        FileSystem fs = FileSystem.get(conf);
        reader = new SequenceFile.Reader(fs, path, conf);

        IntWritable key = new IntWritable();
        PipesVectorWritable vector = new PipesVectorWritable();

        while (reader.next(key, vector)) {
            // System.out.println("readDistributedRowMatrix: key: " + key
            // + Arrays.toString(vector.getVector().toArray()));
            matrix.add(vector.getVector());
        }
        reader.close();

        if (matrix.size() > 0) {
            DoubleVector list[] = new DoubleVector[matrix.size()];
            DenseDoubleMatrix result = new DenseDoubleMatrix(matrix.toArray(list));
            return result;
        }
        return null;

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    return null;
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.util.DistributedRowMatrix.java

License:Apache License

public static void writeDistributedRowMatrix(Configuration conf, double[][] matrix, int rows, int columns,
        Path path, boolean saveTransposed) {

    SequenceFile.Writer writer = null;
    try {/* w ww .  j  a  va  2 s  .  c o m*/
        FileSystem fs = FileSystem.get(conf);
        writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, PipesVectorWritable.class);

        if (saveTransposed) { // Transpose Matrix before saving
            double[][] transposed = new double[columns][rows];
            for (int i = 0; i < rows; i++) {
                for (int j = 0; j < columns; j++) {
                    transposed[j][i] = matrix[i][j];
                }
            }
            matrix = transposed;
        }

        for (int i = 0; i < matrix.length; i++) {
            DenseDoubleVector rowVector = new DenseDoubleVector(matrix[i]);
            writer.append(new IntWritable(i), new PipesVectorWritable(rowVector));
        }

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java

License:Apache License

public void setOutputTempPathString(String outPathString) {
    try {/*  w  w  w  .  j a  v a 2s.  com*/
        outputTmpBasePath = FileSystem.get(conf).makeQualified(new Path(outPathString));
    } catch (IOException ioe) {
        LOG.error(
                "Unable to set outputBasePath to {}, leaving as {}" + outPathString + " " + outputTmpBasePath);
    }
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java

License:Apache License

public static DenseDoubleMatrix readDistributedRowMatrix(Configuration conf, Path path) {

    // System.out.println("readDistributedRowMatrix: " + path);

    List<DoubleVector> matrix = new ArrayList<DoubleVector>();

    SequenceFile.Reader reader = null;
    try {//from   w w  w. jav a 2 s .co m
        FileSystem fs = FileSystem.get(conf);
        reader = new SequenceFile.Reader(fs, path, conf);

        IntWritable key = new IntWritable();
        VectorWritable vector = new VectorWritable();

        while (reader.next(key, vector)) {
            // System.out.println("readDistributedRowMatrix: key: " + key
            // + Arrays.toString(vector.getVector().toArray()));
            matrix.add(vector.getVector());
        }
        reader.close();

        if (matrix.size() > 0) {
            DoubleVector list[] = new DoubleVector[matrix.size()];
            DenseDoubleMatrix result = new DenseDoubleMatrix(matrix.toArray(list));
            return result;
        }
        return null;

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    return null;
}