Example usage for org.apache.hadoop.conf Configuration getInt

List of usage examples for org.apache.hadoop.conf Configuration getInt


In this page you can find the example usage for org.apache.hadoop.conf Configuration getInt.


public int getInt(String name, int defaultValue) 

Source Link


Get the value of the name property as an int.


From source file:BamRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/*from   ww  w. j  av a 2s.  c  om*/
    split_length = split.getLength();
    System.out.println("start: " + start);
    System.out.println("split_length: " + split_length);
    fileInfo = split.getPath();
    //String fileName = fileInfo.toString().split("-")[0];
    //Path file = new Path(fileName);
    //compressionCodecs = new CompressionCodecFactory(job);
    //final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    //FileSystem fs = file.getFileSystem(job);
    //fileIn = fs.open(file);
    //this.pos = start;

From source file:LungDriver.java

License:Creative Commons License

public int run(String[] args) throws Exception {

    Configuration conf = getConf();

    String author = conf.get("com.marcolotz.author");
    String jobName = conf.get("mapreduce.job.name");
    String inputPath = conf.get("mapred.input.dir");
    String outputPath = conf.get("mapred.output.dir");

    System.out.println("\nApplication author: " + author + "\n");

    System.out.println("Configurations stored at: conf/lungConfiguration.xml");
    System.out.println("Input path: " + inputPath);
    System.out.println("Output path: " + outputPath);

    /* For non-standard operation (i.e. with arguments) */
    if (args.length != 0) {

        /* terminates the program if there is an incorrect input */
        if (processInputs(args, conf) != 0) {
            return 1;
        }//  ww w .j  a  v a 2s  .co  m

    System.out.println("Bottom Threshold for nodules candidates detection:"
            + conf.getInt("com.marcolotz.grayNoduleCandidates.bottomThreshold", 110));
    System.out.println("Top Threshold for nodules candidates detection:"
            + conf.getInt("com.marcolotz.grayNoduleCandidates.topThreshold", 120));

    System.out.print("Cleaning output path: ");
    cleanOutputPath(conf, outputPath);

    System.out.print("Configuring the job " + jobName + ": ");

    /* Makes a new job */
    // The classic Job constructor is deprecated.
    Job job = Job.getInstance(conf);

     * This method sets the jar file in which each node will look for the
     * Mapper and Reducer classes.


    // Submits the job to the cluster
    System.out.println("Distributing the job:");
    return job.waitForCompletion(true) ? 0 : 1;

From source file:TestFSConfig.java

License:Open Source License

public static void main(String[] argv) {
    Configuration conf = new Configuration();

    int segmentSize = conf.getInt(ConstVar.ConfSegmentSize, -1);
    int unitSize = conf.getInt(ConstVar.ConfUnitSize, -2);
    int poolSize = conf.getInt(ConstVar.ConfPoolSize, -3);

    System.out.println("seg:" + segmentSize + ",unit:" + unitSize + ",pool:" + poolSize);

From source file:andromache.config.CassandraConfigHelper.java

License:Apache License

public static int getInputSplitSize(Configuration conf) {

From source file:andromache.config.CassandraConfigHelper.java

License:Apache License

 * The number of rows to request with each get range slices request.
 * Too big and you can either get timeouts when it takes Cassandra too
 * long to fetch all the data. Too small and the performance
 * will be eaten up by the overhead of each request.
 * @param conf Job configuration you are about to run
 * @return Number of rows to request each time
 *//*from   ww w. j  a va 2 s. c om*/
public static int getRangeBatchSize(Configuration conf) {

From source file:andromache.hadoop.CassandraRecordWriter.java

License:Apache License

CassandraRecordWriter(TaskAttemptContext context) throws IOException {
    this.progressable = context;
    Configuration conf = context.getConfiguration();
    int queueSize = conf.getInt(QUEUE_SIZE, 256);
    int batchSize = conf.getInt(BATCH_SIZE, 32);
    long batchDelay = conf.getLong(BATCH_DELAY_MS, 200);
    ConsistencyLevel consistencyLevel = CassandraConfigHelper.getWriteConsistencyLevel(conf);

    CassandraClientFactory cassandraClientFactory = new CassandraClientFactory(

    this.rangeThreadsCache = new RangeThreadsCache(conf, cassandraClientFactory, queueSize, batchSize,
            batchDelay, progressable, consistencyLevel);

    log.info("Using consistency level of {}", consistencyLevel);

From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java

License:Apache License

private static void prepareInput(Configuration conf, Path inputPath, Path exampleFile, int n)
        throws IOException {
    FileSystem fs = inputPath.getFileSystem(conf);

    // Create input file writers depending on bspTaskNum
    int bspTaskNum = conf.getInt("bsp.peers.num", 1);
    SequenceFile.Writer[] inputWriters = new SequenceFile.Writer[bspTaskNum];
    for (int i = 0; i < bspTaskNum; i++) {
        Path inputFile = new Path(inputPath, "input" + i + ".seq");
        LOG.info("inputFile: " + inputFile.toString());
        inputWriters[i] = SequenceFile.createWriter(fs, conf, inputFile, IntWritable.class, NullWritable.class,
                CompressionType.NONE);/*w  w w. j a v a 2 s .  co  m*/

    // Create example file writer
    SequenceFile.Writer exampleWriter = SequenceFile.createWriter(fs, conf, exampleFile, IntWritable.class,
            NullWritable.class, CompressionType.NONE);

    // Write random values to input files and example
    IntWritable inputKey = new IntWritable();
    NullWritable nullValue = NullWritable.get();
    Random r = new Random();
    for (long i = 0; i < n; i++) {
        for (int j = 0; j < inputWriters.length; j++) {
            inputWriters[j].append(inputKey, nullValue);
        exampleWriter.append(inputKey, nullValue);

    // Close file writers
    for (int j = 0; j < inputWriters.length; j++) {

From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java

License:Apache License

public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException {

    Configuration conf = new HamaConfiguration();

    if (args.length > 0) {
        if (args.length == 1) {
            conf.setInt("bsp.peers.num", Integer.parseInt(args[0]));
        } else {/*w ww .  j a  v  a 2  s . c  o m*/
            System.out.println("Wrong argument size!");
            System.out.println("    Argument1=numBspTask");
    } else {
        // BSPJobClient jobClient = new BSPJobClient(conf);
        // ClusterStatus cluster = jobClient.getClusterStatus(true);
        // job.setNumBspTask(cluster.getMaxTasks());

        conf.setInt("bsp.peers.num", 2); // 1 CPU and 1 GPU
    // Enable one GPU task
    conf.setInt("bsp.peers.gpu.num", 1);
    conf.setBoolean("hama.pipes.logging", true);

    LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
    LOG.info("NumBspGpuTask: " + conf.getInt("bsp.peers.gpu.num", 0));
    LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum"));
    LOG.info("inputPath: " + CONF_INPUT_DIR);
    LOG.info("outputPath: " + CONF_OUTPUT_DIR);

    Path example = new Path(CONF_INPUT_DIR.getParent(), "example.seq");
    conf.set(CONF_EXAMPLE_PATH, example.toString());
    LOG.info("exampleFile: " + example.toString());

    prepareInput(conf, CONF_INPUT_DIR, example, CONF_N);

    BSPJob job = createHelloHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR);

    long startTime = System.currentTimeMillis();
    if (job.waitForCompletion(true)) {
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

        // Print input files
        // printOutput(job, CONF_INPUT_DIR);
        // printOutput(job, example);

        // Print output
        printOutput(job, FileOutputFormat.getOutputPath(job));

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

public static void main(String[] args) throws Exception {

    // Defaults/*from  w w  w  . j  a va 2s.  c  o m*/
    int numBspTask = 1;
    int numGpuBspTask = 1;
    int blockSize = BLOCK_SIZE;
    int gridSize = GRID_SIZE;
    long n = 10; // input vectors
    int k = 3; // start vectors
    int vectorDimension = 2;
    int maxIteration = 10;
    boolean useTestExampleInput = false;
    boolean isDebugging = false;
    boolean timeMeasurement = false;
    int GPUPercentage = 80;

    Configuration conf = new HamaConfiguration();
    FileSystem fs = FileSystem.get(conf);

    // Set numBspTask to maxTasks
    // BSPJobClient jobClient = new BSPJobClient(conf);
    // ClusterStatus cluster = jobClient.getClusterStatus(true);
    // numBspTask = cluster.getMaxTasks();

    if (args.length > 0) {
        if (args.length == 12) {
            numBspTask = Integer.parseInt(args[0]);
            numGpuBspTask = Integer.parseInt(args[1]);
            blockSize = Integer.parseInt(args[2]);
            gridSize = Integer.parseInt(args[3]);
            n = Long.parseLong(args[4]);
            k = Integer.parseInt(args[5]);
            vectorDimension = Integer.parseInt(args[6]);
            maxIteration = Integer.parseInt(args[7]);
            useTestExampleInput = Boolean.parseBoolean(args[8]);
            GPUPercentage = Integer.parseInt(args[9]);
            isDebugging = Boolean.parseBoolean(args[10]);
            timeMeasurement = Boolean.parseBoolean(args[11]);

        } else {
            System.out.println("Wrong argument size!");
            System.out.println("    Argument1=numBspTask");
            System.out.println("    Argument2=numGpuBspTask");
            System.out.println("    Argument3=blockSize");
            System.out.println("    Argument4=gridSize");
            System.out.println("    Argument5=n | Number of input vectors (" + n + ")");
            System.out.println("    Argument6=k | Number of start vectors (" + k + ")");
                    "    Argument7=vectorDimension | Dimension of each vector (" + vectorDimension + ")");
                    "    Argument8=maxIterations | Number of maximal iterations (" + maxIteration + ")");
            System.out.println("    Argument9=testExample | Use testExample input (true|false=default)");
            System.out.println("    Argument10=GPUPercentage (percentage of input)");
            System.out.println("    Argument11=isDebugging (true|false=defaul)");
            System.out.println("    Argument12=timeMeasurement (true|false=defaul)");

    // Set config variables
    conf.setBoolean(CONF_DEBUG, isDebugging);
    conf.setBoolean("hama.pipes.logging", false);
    conf.setBoolean(CONF_TIME, timeMeasurement);

    // Set CPU tasks
    conf.setInt("bsp.peers.num", numBspTask);
    // Set GPU tasks
    conf.setInt("bsp.peers.gpu.num", numGpuBspTask);
    // Set GPU blockSize and gridSize
    conf.set(CONF_BLOCKSIZE, "" + blockSize);
    conf.set(CONF_GRIDSIZE, "" + gridSize);
    // Set maxIterations for KMeans
    conf.setInt(CONF_MAX_ITERATIONS, maxIteration);
    // Set n for KMeans
    conf.setLong(CONF_N, n);
    // Set GPU workload
    conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage);

    LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
    LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
    LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum"));
    LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE));
    LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE));
    LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE));
    LOG.info("isDebugging: " + conf.get(CONF_DEBUG));
    LOG.info("timeMeasurement: " + conf.get(CONF_TIME));
    LOG.info("useTestExampleInput: " + useTestExampleInput);
    LOG.info("inputPath: " + CONF_INPUT_DIR);
    LOG.info("centersPath: " + CONF_CENTER_DIR);
    LOG.info("outputPath: " + CONF_OUTPUT_DIR);
    LOG.info("n: " + n);
    LOG.info("k: " + k);
    LOG.info("vectorDimension: " + vectorDimension);
    LOG.info("maxIteration: " + maxIteration);

    Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq");
    Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq");
    conf.set(CONF_CENTER_IN_PATH, centerIn.toString());
    conf.set(CONF_CENTER_OUT_PATH, centerOut.toString());

    // prepare Input
    if (useTestExampleInput) {
        // prepareTestInput(conf, fs, input, centerIn);
        prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension,
                null, GPUPercentage);
    } else {
        prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension,
                new Random(3337L), GPUPercentage);

    BSPJob job = createKMeansHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR);

    long startTime = System.currentTimeMillis();
    if (job.waitForCompletion(true)) {
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

        if (isDebugging) {
            printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get());
            printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable());

        if (k < 50) {
            printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get());

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBSP.java

License:Apache License

public static void main(String[] args) throws Exception {

    // Defaults//from w w  w . j ava 2s . c  om
    int numRowsA = 4;// 1024;
    int numColsA = 4;// 1024;
    int numRowsB = 4;// 1024;
    int numColsB = 4;// 1024;
    boolean isDebugging = true;

    Configuration conf = new HamaConfiguration();
    BSPJobClient jobClient = new BSPJobClient(conf);
    ClusterStatus cluster = jobClient.getClusterStatus(true);

    if (args.length > 0) {
        if (args.length == 6) {
            conf.setInt("bsp.peers.num", Integer.parseInt(args[0]));
            numRowsA = Integer.parseInt(args[1]);
            numColsA = Integer.parseInt(args[2]);
            numRowsB = Integer.parseInt(args[3]);
            numColsB = Integer.parseInt(args[4]);
            isDebugging = Boolean.parseBoolean(args[5]);

        } else {
            System.out.println("Wrong argument size!");
            System.out.println("    Argument1=numBspTask");
            System.out.println("    Argument2=numRowsA | Number of rows of the first input matrix");
            System.out.println("    Argument3=numColsA | Number of columns of the first input matrix");
            System.out.println("    Argument4=numRowsB | Number of rows of the second input matrix");
            System.out.println("    Argument5=numColsB | Number of columns of the second input matrix");
            System.out.println("    Argument6=debug | Enable debugging (true|false)");
    } else {
        conf.setInt("bsp.peers.num", 1); // cluster.getMaxTasks());
        // Enable one GPU task
        conf.setInt("bsp.peers.gpu.num", 1);

    conf.setBoolean("hama.pipes.logging", isDebugging);
    conf.setBoolean(CONF_DEBUG, isDebugging);
    conf.set(CONF_BLOCKSIZE, "" + BLOCK_SIZE);
    conf.set(CONF_GRIDSIZE, "" + GRID_SIZE);

    LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
    LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
    LOG.info("numRowsA: " + numRowsA);
    LOG.info("numColsA: " + numColsA);
    LOG.info("numRowsB: " + numRowsB);
    LOG.info("numColsB: " + numColsB);
    LOG.info("isDebugging: " + isDebugging);
    LOG.info("outputPath: " + OUTPUT_DIR);

    if (numColsA != numRowsB) {
        throw new Exception("Cols of MatrixA != rows of MatrixB! (" + numColsA + "!=" + numRowsB + ")");

    // Create random DistributedRowMatrix
    // use constant seeds to get reproducible results

    // Matrix A
    DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L),
            MATRIX_A_PATH, false);
    // Matrix B is stored transposed
    DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L),
            MATRIX_B_PATH, true);

    // Load DistributedRowMatrix a and b
    DistributedRowMatrix a = new DistributedRowMatrix(MATRIX_A_PATH, OUTPUT_DIR, numRowsA, numColsA);

    DistributedRowMatrix b = new DistributedRowMatrix(MATRIX_B_PATH, OUTPUT_DIR, numRowsB, numColsB);

    // MatrixMultiplication
    long startTime = System.currentTimeMillis();
    DistributedRowMatrix c = a.multiplyBSP(b, MATRIX_C_PATH);

    LOG.info("MatrixMultiplicationHybrid using Hama finished in "
            + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    // Verification

    // Overwrite matrix B, NOT transposed for verification
    DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L),
            MATRIX_B_PATH, false);
    b = new DistributedRowMatrix(MATRIX_B_PATH, OUTPUT_DIR, numRowsB, numColsB);

    DistributedRowMatrix d = a.multiplyJava(b, MATRIX_D_PATH);

    if (c.verify(d)) {
        System.out.println("Verify PASSED!");
    } else {
        System.out.println("Verify FAILED!");

    if (isDebugging) {
        System.out.println("Matrix A:");
        System.out.println("Matrix B:");
        System.out.println("Matrix C:");
        System.out.println("Matrix D:");