Example usage for org.apache.hadoop.mapred JobConf setBoolean

List of usage examples for org.apache.hadoop.mapred JobConf setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setBoolean.

Prototype

public void setBoolean(String name, boolean value) 

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:net.peacesoft.nutch.crawl.ReSolrIndexer.java

License:Apache License

public void indexSolr(String solrUrl, Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit,
        boolean deleteGone, String solrParams, boolean filter, boolean normalize) throws IOException {

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("SolrIndexer: starting at " + sdf.format(start));

    final JobConf job = new NutchJob(getConf());
    job.setJobName("index-solr " + solrUrl);

    LOG.info("SolrIndexer: deleting gone documents: " + deleteGone);
    LOG.info("SolrIndexer: URL filtering: " + filter);
    LOG.info("SolrIndexer: URL normalizing: " + normalize);

    IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);

    job.set(ReSolrConstants.SERVER_URL, solrUrl);
    job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone);
    job.setBoolean(IndexerMapReduce.URL_FILTERING, filter);
    job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize);
    if (solrParams != null) {
        job.set(ReSolrConstants.PARAMS, solrParams);
    }/*from  ww w.jav a2s  . co m*/
    NutchIndexWriterFactory.addClassToConf(job, ReSolrWriter.class);

    job.setReduceSpeculativeExecution(false);

    final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" + new Random().nextInt());

    FileOutputFormat.setOutputPath(job, tmp);
    try {
        JobClient.runJob(job);
        // do the commits once and for all the reducers in one go
        SolrServer solr = SolrUtils.getCommonsHttpSolrServer(job);

        if (!noCommit) {
            solr.commit();
        }
        long end = System.currentTimeMillis();
        LOG.info("SolrIndexer: finished at " + sdf.format(end) + ", elapsed: "
                + TimingUtil.elapsedTime(start, end));
    } catch (Exception e) {
        LOG.error(e.toString());
    } finally {
        FileSystem.get(job).delete(tmp, true);
    }
}

From source file:nl.tudelft.graphalytics.mapreducev2.evo.DirectedForestFireModelJob.java

License:Apache License

@Override
protected void setConfigurationParameters(JobConf jobConfiguration) {
    super.setConfigurationParameters(jobConfiguration);
    jobConfiguration.setLong(ForestFireModelUtils.MAX_ID, getParameters().getMaxId() + 1);
    jobConfiguration.setFloat(ForestFireModelUtils.P_RATIO, getParameters().getPRatio());
    jobConfiguration.setFloat(ForestFireModelUtils.R_RATIO, getParameters().getRRatio());
    jobConfiguration.set(ForestFireModelUtils.CURRENT_AMBASSADORS,
            ForestFireModelUtils.verticesIDsMap2String(burnedEdges));

    if (getIteration() == 1) {
        if (getNumMappers() > 0) {
            jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR,
                    getParameters().getNumNewVertices() / getNumMappers());
            jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, getNumMappers());
        } else {//from   www  .  ja v  a  2 s . co  m
            jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR, getParameters().getNumNewVertices());
            jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, 1024 * 1024);
        }
        jobConfiguration.setBoolean(ForestFireModelUtils.IS_INIT, true);
    }
}

From source file:nl.tudelft.graphalytics.mapreducev2.evo.UndirectedForestFireModelJob.java

License:Apache License

@Override
protected void setConfigurationParameters(JobConf jobConfiguration) {
    super.setConfigurationParameters(jobConfiguration);
    jobConfiguration.setLong(ForestFireModelUtils.MAX_ID, getParameters().getMaxId() + 1);
    jobConfiguration.setFloat(ForestFireModelUtils.P_RATIO, getParameters().getPRatio());
    jobConfiguration.setFloat(ForestFireModelUtils.R_RATIO, getParameters().getRRatio());
    jobConfiguration.set(ForestFireModelUtils.CURRENT_AMBASSADORS,
            ForestFireModelUtils.verticesIDsMap2String(burnedEdges));

    if (getIteration() == 1) {
        if (getNumMappers() > 0) {
            jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR,
                    getParameters().getNumNewVertices() / getNumMappers());
            jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, getNumMappers());
        } else {// w ww .j a  v a 2 s .  c  o m
            jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR, getParameters().getNumNewVertices());
            jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, 1024 * 1024);
        }
        jobConfiguration.setBoolean(ForestFireModelUtils.IS_INIT, true);
    } else if (getIteration() == getParameters().getMaxIterations() + 1) {
        jobConfiguration.setBoolean(ForestFireModelUtils.IS_FINAL, true);
    }
}

From source file:nthu.scopelab.tsqr.ssvd.BtJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p,
        int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis)
        throws Exception {
    boolean outputQ = true;

    String stages[] = reduceSchedule.split(",");

    JobConf job = new JobConf(conf, BtJob.class);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setInt(SCHEDULE_NUM, stages.length);
    job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight);
    job.setInt(QJob.PROP_K, k);/*from  ww w.j  a va  2s .c o m*/
    job.setInt(QJob.PROP_P, p);
    job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ);
    job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts);
    job.set(QmultiplyJob.QRF_DIR, qrfPath);
    FileSystem.get(job).delete(btPath, true);

    FileOutputFormat.setOutputPath(job, btPath);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setJobName("BtJob");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    //job.setOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(BtMapper.class);
    job.setCombinerClass(OuterProductCombiner.class);
    job.setReducerClass(OuterProductReducer.class);

    fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job));
    mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job));
    job.setNumMapTasks(fgather.recNumMapTasks(mis));

    //job.setNumReduceTasks(0);
    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, inputPath);

    if (outputQ) {
        MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class,
                IntWritable.class, LMatrixWritable.class);
    }
    if (outputBBtProducts) {
        MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
    }
    RunningJob rj = JobClient.runJob(job);
    System.out.println("Btjob Job ID: " + rj.getJobID().toString());
}

From source file:nthu.scopelab.tsqr.ssvd.itBtJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p,
        int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis)
        throws Exception {
    boolean outputQ = true;

    String stages[] = reduceSchedule.split(",");

    JobConf job = new JobConf(conf, itBtJob.class);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setInt(SCHEDULE_NUM, stages.length);
    job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight);
    job.setInt(QJob.PROP_K, k);/*ww w.ja  v a2  s  .  c om*/
    job.setInt(QJob.PROP_P, p);
    job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ);
    job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts);
    job.set(QmultiplyJob.QRF_DIR, qrfPath);
    FileSystem.get(job).delete(btPath, true);

    FileOutputFormat.setOutputPath(job, btPath);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setJobName("itBtJob");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    //job.setOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(BtMapper.class);
    job.setCombinerClass(OuterProductCombiner.class);
    job.setReducerClass(OuterProductReducer.class);

    fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job));
    mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job));
    job.setNumMapTasks(fgather.recNumMapTasks(mis));

    //job.setNumReduceTasks(0);
    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, inputPath);

    if (outputQ) {
        MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class,
                IntWritable.class, LMatrixWritable.class);
    }
    if (outputBBtProducts) {
        MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
    }
    RunningJob rj = JobClient.runJob(job);
    System.out.println("itBtJob Job ID: " + rj.getJobID().toString());
}

From source file:org.acacia.csr.java.CSRConverter.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (!validArgs(args)) {
        printUsage();//from ww w .j  av  a  2 s. com
        return;
    }
    //These are the temp paths that are created on HDFS
    String dir1 = "/user/miyuru/csrconverter-output";
    String dir2 = "/user/miyuru/csrconverter-output-sorted";

    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());

    System.out.println("Deleting the dir : " + dir1);

    if (fs1.exists(new Path(dir1))) {
        fs1.delete(new Path(dir1), true);
    }

    System.out.println("Done deleting the dir : " + dir1);
    System.out.println("Deleting the dir : " + dir2);
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }

    Path notinPath = new Path("/user/miyuru/notinverts/notinverts");

    if (!fs1.exists(notinPath)) {
        fs1.create(notinPath);
    }

    System.out.println("Done deleting the dir : " + dir2);

    //Note on Aug 23 2014: Sometimes after this the mapReduce job hangs. need to see why.

    VertexCounterClient.setDefaultGraphID(args[3], args[2]);

    //First job creates the inverted index

    JobConf conf = new JobConf(CSRConverter.class);
    conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[1]);
    conf.set("org.acacia.partitioner.hbase.table", args[2]);
    conf.set("org.acacia.partitioner.hbase.contacthost", args[3]);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    //conf.setMapperClass(InvertedMapper.class);
    conf.setReducerClass(InvertedReducer.class);
    //conf.setInputFormat(TextInputFormat.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    //FileInputFormat.setInputPaths(conf, new Path(args[0]));
    MultipleInputs.addInputPath(conf, new Path(args[0]), NLinesInputFormat.class, InvertedMapper.class);
    MultipleInputs.addInputPath(conf, new Path("/user/miyuru/notinverts/notinverts"), TextInputFormat.class,
            InvertedMapper.class);
    FileOutputFormat.setOutputPath(conf, new Path(dir1));

    //Also for the moment we turn-off the speculative execution
    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    conf.setNumMapTasks(96);
    conf.setNumReduceTasks(96);
    conf.setPartitionerClass(VertexPartitioner.class);
    conf.set("vertex-count", args[4]);
    conf.set("zero-flag", args[5]);
    Job job = new Job(conf, "csr_inverter");
    job.setSortComparatorClass(SortComparator.class);
    job.waitForCompletion(true);
}

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

/** Indicate that a job's input data should use reflect representation.*/
public static void setInputReflect(JobConf job) {
    job.setBoolean(INPUT_IS_REFLECT, true);
}

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

/** Indicate that a job's map output data should use reflect representation.*/
public static void setMapOutputReflect(JobConf job) {
    job.setBoolean(MAP_OUTPUT_IS_REFLECT, true);
}

From source file:org.apache.avro.mapred.AvroMultipleOutputs.java

License:Apache License

/**
 * Adds a named output for the job./*from www .  j  av a  2s  .  c  o  m*/
 * <p/>
 *
 * @param conf              job conf to add the named output
 * @param namedOutput       named output name, it has to be a word, letters
 *                          and numbers only, cannot be the word 'part' as
 *                          that is reserved for the
 *                          default output.
 * @param multi             indicates if the named output is multi
 * @param outputFormatClass OutputFormat class.
 * @param schema            Schema to used for this namedOutput
 */
private static void addNamedOutput(JobConf conf, String namedOutput, boolean multi,
        Class<? extends OutputFormat> outputFormatClass, Schema schema) {
    checkNamedOutputName(namedOutput);
    checkNamedOutput(conf, namedOutput, true);
    boolean isMapOnly = conf.getNumReduceTasks() == 0;
    if (schema != null)
        conf.set(MO_PREFIX + namedOutput + ".schema", schema.toString());
    conf.set(NAMED_OUTPUTS, conf.get(NAMED_OUTPUTS, "") + " " + namedOutput);
    conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class);
    conf.setBoolean(MO_PREFIX + namedOutput + MULTI, multi);
}

From source file:org.apache.avro.mapred.AvroMultipleOutputs.java

License:Apache License

/**
 * Enables or disables counters for the named outputs.
 * <p/>/*  ww w. ja  v  a  2 s  .  c  om*/
 * By default these counters are disabled.
 * <p/>
 * MultipleOutputs supports counters, by default the are disabled.
 * The counters group is the {@link MultipleOutputs} class name.
 * </p>
 * The names of the counters are the same as the named outputs. For multi
 * named outputs the name of the counter is the concatenation of the named
 * output, and underscore '_' and the multiname.
 *
 * @param conf    job conf to enableadd the named output.
 * @param enabled indicates if the counters will be enabled or not.
 */
public static void setCountersEnabled(JobConf conf, boolean enabled) {
    conf.setBoolean(COUNTERS_ENABLED, enabled);
}