Example usage for org.apache.hadoop.conf Configuration setLong

List of usage examples for org.apache.hadoop.conf Configuration setLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setLong.

Prototype

public void setLong(String name, long value) 

Source Link

Document

Set the value of the name property to a long.

Usage

From source file:ParascaleFsTestCase.java

License:Apache License

/**
 * Creates a new Hadoop Configuration object.
 *
 * @return a new Hadoop configuration object
 *
 * @see Configuration/* w w  w . ja va  2  s  . co m*/
 */
protected Configuration getConf() {
    final Configuration conf = new Configuration();
    if (setDefaultBlockSize) {
        conf.setLong(RawParascaleFileSystem.PS_DEFAULT_BLOCKSIZE, defaultBlockSize);
    }
    if (setDefaultReplication) {
        conf.setLong(RawParascaleFileSystem.PS_DEFAULT_REPLICATION, defaultReplication);
    }
    if (setMountPoint) {
        conf.set(RawParascaleFileSystem.PS_MOUNT_POINT, String.format("%s/%s", getTempDir(), mountPoint));
    }
    if (setDefaultFsName) {
        conf.set(FS_DEFAULT_NAME, String.format("%s%s@%s", fsScheme, virtualFs, controlNode));
    }

    return conf;
}

From source file:PairsPMI_M.java

License:Apache License

/**
 * Runs this tool./*from w w w.ja va  2 s  .  c  o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    // First MapReduce Job

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - tmp path: " + outputPath + "/tmp");
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(PairsPMI_M.class.getSimpleName());
    job.setJarByClass(PairsPMI_M.class);

    // Delete the tmp directory if it exists already
    Path tmpDir = new Path("tmp_wj");
    FileSystem.get(getConf()).delete(tmpDir, true);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path("tmp_wj"));

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputValueClass(FloatWritable.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    //    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    double time1 = (System.currentTimeMillis() - startTime) / 1000.0;
    System.out.println("Job Finished in " + time1 + " seconds");
    numRecords = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS")
            .getValue();

    /*
     *  Second MapReduce Job
     */

    LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName());
    LOG.info("second stage of MapReduce");
    LOG.info(" - input from tmp path: " + outputPath + "/tmp_wj");
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);

    // set the global variable
    Configuration conf = getConf();
    conf.setLong("numRec", numRecords);

    job = Job.getInstance(getConf());
    job.setJobName(PairsPMI_M.class.getSimpleName());
    job.setJarByClass(PairsPMI_M.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.setNumReduceTasks(reduceTasks);
    FileInputFormat.setInputPaths(job, new Path("tmp_wj/part*"));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(FloatWritable.class);
    // job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputKeyClass(Text.class);

    job.setOutputValueClass(FloatWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    //   job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(MyMapperSecond.class);
    //    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducerSecond.class);
    job.setPartitionerClass(MyPartitioner.class);

    startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    double time2 = (System.currentTimeMillis() - startTime) / 1000.0;
    System.out.println("Second job finished in " + time2 + " seconds");
    System.out.println("Total time: " + (time1 + time2) + " seconds");

    return 0;
}

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

public static void main(String[] args) throws Exception {

    // Defaults//from www .j a v  a 2s.c  o m
    int numBspTask = 1;
    int numGpuBspTask = 1;
    int blockSize = BLOCK_SIZE;
    int gridSize = GRID_SIZE;
    long n = 10; // input vectors
    int k = 3; // start vectors
    int vectorDimension = 2;
    int maxIteration = 10;
    boolean useTestExampleInput = false;
    boolean isDebugging = false;
    boolean timeMeasurement = false;
    int GPUPercentage = 80;

    Configuration conf = new HamaConfiguration();
    FileSystem fs = FileSystem.get(conf);

    // Set numBspTask to maxTasks
    // BSPJobClient jobClient = new BSPJobClient(conf);
    // ClusterStatus cluster = jobClient.getClusterStatus(true);
    // numBspTask = cluster.getMaxTasks();

    if (args.length > 0) {
        if (args.length == 12) {
            numBspTask = Integer.parseInt(args[0]);
            numGpuBspTask = Integer.parseInt(args[1]);
            blockSize = Integer.parseInt(args[2]);
            gridSize = Integer.parseInt(args[3]);
            n = Long.parseLong(args[4]);
            k = Integer.parseInt(args[5]);
            vectorDimension = Integer.parseInt(args[6]);
            maxIteration = Integer.parseInt(args[7]);
            useTestExampleInput = Boolean.parseBoolean(args[8]);
            GPUPercentage = Integer.parseInt(args[9]);
            isDebugging = Boolean.parseBoolean(args[10]);
            timeMeasurement = Boolean.parseBoolean(args[11]);

        } else {
            System.out.println("Wrong argument size!");
            System.out.println("    Argument1=numBspTask");
            System.out.println("    Argument2=numGpuBspTask");
            System.out.println("    Argument3=blockSize");
            System.out.println("    Argument4=gridSize");
            System.out.println("    Argument5=n | Number of input vectors (" + n + ")");
            System.out.println("    Argument6=k | Number of start vectors (" + k + ")");
            System.out.println(
                    "    Argument7=vectorDimension | Dimension of each vector (" + vectorDimension + ")");
            System.out.println(
                    "    Argument8=maxIterations | Number of maximal iterations (" + maxIteration + ")");
            System.out.println("    Argument9=testExample | Use testExample input (true|false=default)");
            System.out.println("    Argument10=GPUPercentage (percentage of input)");
            System.out.println("    Argument11=isDebugging (true|false=defaul)");
            System.out.println("    Argument12=timeMeasurement (true|false=defaul)");
            return;
        }
    }

    // Set config variables
    conf.setBoolean(CONF_DEBUG, isDebugging);
    conf.setBoolean("hama.pipes.logging", false);
    conf.setBoolean(CONF_TIME, timeMeasurement);

    // Set CPU tasks
    conf.setInt("bsp.peers.num", numBspTask);
    // Set GPU tasks
    conf.setInt("bsp.peers.gpu.num", numGpuBspTask);
    // Set GPU blockSize and gridSize
    conf.set(CONF_BLOCKSIZE, "" + blockSize);
    conf.set(CONF_GRIDSIZE, "" + gridSize);
    // Set maxIterations for KMeans
    conf.setInt(CONF_MAX_ITERATIONS, maxIteration);
    // Set n for KMeans
    conf.setLong(CONF_N, n);
    // Set GPU workload
    conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage);

    LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
    LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
    LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum"));
    LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE));
    LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE));
    LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE));
    LOG.info("isDebugging: " + conf.get(CONF_DEBUG));
    LOG.info("timeMeasurement: " + conf.get(CONF_TIME));
    LOG.info("useTestExampleInput: " + useTestExampleInput);
    LOG.info("inputPath: " + CONF_INPUT_DIR);
    LOG.info("centersPath: " + CONF_CENTER_DIR);
    LOG.info("outputPath: " + CONF_OUTPUT_DIR);
    LOG.info("n: " + n);
    LOG.info("k: " + k);
    LOG.info("vectorDimension: " + vectorDimension);
    LOG.info("maxIteration: " + maxIteration);

    Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq");
    Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq");
    conf.set(CONF_CENTER_IN_PATH, centerIn.toString());
    conf.set(CONF_CENTER_OUT_PATH, centerOut.toString());

    // prepare Input
    if (useTestExampleInput) {
        // prepareTestInput(conf, fs, input, centerIn);
        prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension,
                null, GPUPercentage);
    } else {
        prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension,
                new Random(3337L), GPUPercentage);
    }

    BSPJob job = createKMeansHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR);

    long startTime = System.currentTimeMillis();
    if (job.waitForCompletion(true)) {
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

        if (isDebugging) {
            printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get());
            printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable());
        }

        if (k < 50) {
            printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get());
        }
    }
}

From source file:backup.integration.MiniClusterTestBase.java

License:Apache License

private Configuration setupConfig(File hdfsDir) throws Exception {
    Configuration conf = new Configuration();
    File backup = new File(tmpHdfs, "backup");
    backup.mkdirs();// w w  w .j a  v  a 2  s.  co m
    conf.set(DFS_BACKUP_NAMENODE_LOCAL_DIR_KEY, backup.getAbsolutePath());
    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsDir.getAbsolutePath());
    conf.set(DFSConfigKeys.DFS_DATANODE_FSDATASET_FACTORY_KEY, BackupFsDatasetSpiFactory.class.getName());
    conf.set(DFSConfigKeys.DFS_DATANODE_PLUGINS_KEY, DataNodeBackupServicePlugin.class.getName());
    conf.set(DFSConfigKeys.DFS_NAMENODE_PLUGINS_KEY, NameNodeBackupServicePlugin.class.getName());

    conf.setInt(BackupConstants.DFS_BACKUP_DATANODE_RPC_PORT_KEY, 0);
    conf.setInt(BackupConstants.DFS_BACKUP_NAMENODE_HTTP_PORT_KEY, 0);

    conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 2);// 3
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_MINIMUM_INTERVAL_KEY, 2);// 3
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, 6000);// 30000
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 6000);// 5*60*1000

    org.apache.commons.configuration.Configuration configuration = BackupUtil.convert(conf);
    setupBackupStore(configuration);
    @SuppressWarnings("unchecked")
    Iterator<String> keys = configuration.getKeys();
    while (keys.hasNext()) {
        String key = keys.next();
        conf.set(key, configuration.getString(key));
    }

    return conf;
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private boolean runAprioriOncPhaseOnce(FIMOptions opt, long nrLines, int i, String info, String outputDir,
        String cacheFile) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
    int prefixSize = opt.prefixLength;

    System.out/*from  w w  w .  ja va2s.  c  o m*/
            .println("[AprioriPhase]: Phase: " + i + " input: " + opt.inputFile + ", output: " + opt.outputDir);

    Job job = prepareJob(new Path(opt.inputFile), new Path(outputDir), SplitByKTextInputFormat.class,
            AprioriPhaseMapper.class, Text.class, Text.class, AprioriPhaseReducer.class, Text.class,
            IntWritable.class, TextOutputFormat.class);

    job.setJobName(info);
    job.setJarByClass(BigFIMDriver.class);

    job.setNumReduceTasks(1);

    Configuration conf = job.getConfiguration();
    setConfigurationValues(conf, opt);
    if (nrLines != -1) {
        conf.setLong(NUMBER_OF_LINES_KEY, nrLines);
    }

    if (cacheFile != null) {
        addCacheFile(new URI(cacheFile.replace(" ", "%20")), conf);
    }

    runJob(job, info);

    if (prefixSize <= i
            && job.getCounters().findCounter(COUNTER_GROUPNAME, COUNTER_NRLARGEPREFIXGROUPS).getValue() == 0) {
        return false;
    }
    if (prefixSize < i) {
        System.out.println(
                "[AprioriPhase]: Prefix group length updated! Now " + (i) + " instead of " + prefixSize);
    }
    return true;
}

From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java

License:Apache License

private Configuration createConfiguration(int... numberOfLines) {
    Configuration conf = new Configuration();
    if (numberOfLines.length > 0) {
        conf.setLong(NUMBER_OF_LINES_KEY, numberOfLines[0]);
    }/*w ww . ja v a2 s .  c  o m*/
    conf.set("fs.default.name", "file:///");
    conf.setBoolean("fs.file.impl.disable.cache", false);
    conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
    return conf;
}

From source file:be.ugent.intec.halvade.utils.HalvadeConf.java

License:Open Source License

public static void setRefSize(Configuration conf, long val) {
    conf.setLong(refSize, val);
}

From source file:boa.runtime.BoaRunner.java

License:Apache License

/**
 * Create a {@link Job} describing the work to be done by this Boa job.
 * /*from   ww w  .java 2s  . c om*/
 * @param ins
 *            An array of {@link Path} containing the locations of the input
 *            files
 * 
 * @param out
 *            A {@link Path} containing the location of the output file
 * 
 * @param robust
 *            A boolean representing whether the job should ignore most
 *            exceptions
 * 
 * @return A {@link Job} describing the work to be done by this Boa job
 * @throws IOException
 */
public Job job(final Path[] ins, final Path out, final boolean robust) throws IOException {
    final Configuration configuration = getConf();

    configuration.setBoolean("boa.runtime.robust", robust);

    // faster local reads
    configuration.setBoolean("dfs.client.read.shortcircuit", true);
    configuration.setBoolean("dfs.client.read.shortcircuit.skip.checksum", true);

    // by default our MapFile's index every key, which takes up
    // a lot of memory - this lets you skip keys in the index and
    // control the memory requirements (as a tradeoff of slower gets)
    //configuration.setLong("io.map.index.skip", 128);

    // map output compression
    configuration.setBoolean("mapred.compress.map.output", true);
    configuration.set("mapred.map.output.compression.type", "BLOCK");
    configuration.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class);

    configuration.setBoolean("mapred.map.tasks.speculative.execution", false);
    configuration.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    configuration.setLong("mapred.job.reuse.jvm.num.tasks", -1);

    final Job job = new Job(configuration);

    if (ins != null)
        for (final Path in : ins)
            FileInputFormat.addInputPath(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setPartitionerClass(BoaPartitioner.class);

    job.setMapOutputKeyClass(EmitKey.class);
    job.setMapOutputValueClass(EmitValue.class);

    job.setOutputFormatClass(BoaOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    return job;
}

From source file:cn.jpush.hdfs.mr.example.BaileyBorweinPlouffe.java

License:Apache License

/** Create and setup a job */
@SuppressWarnings("deprecation")
private static Job createJob(String name, Configuration conf) throws IOException {
    final Job job = new Job(conf, NAME + "_" + name);
    final Configuration jobconf = job.getConfiguration();
    job.setJarByClass(BaileyBorweinPlouffe.class);

    // setup mapper
    job.setMapperClass(BbpMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    // setup reducer
    job.setReducerClass(BbpReducer.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setNumReduceTasks(1);// w  w  w .j av a  2 s .c  o  m

    // setup input
    job.setInputFormatClass(BbpInputFormat.class);

    // disable task timeout
    jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);

    // do not use speculative execution
    jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
    jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
    return job;
}

From source file:co.cask.cdap.data.stream.AbstractStreamInputFormat.java

License:Apache License

/**
 * Sets the TTL for the stream events./*from  ww  w.  j  av  a  2  s.c  o  m*/
 *
 * @param conf  The configuration to modify
 * @param ttl TTL of the stream in milliseconds.
 */
public static void setTTL(Configuration conf, long ttl) {
    Preconditions.checkArgument(ttl >= 0, "TTL must be >= 0");
    conf.setLong(STREAM_TTL, ttl);
}