List of usage examples for org.apache.hadoop.mapred JobConf setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:net.peacesoft.nutch.crawl.ReSolrIndexer.java
License:Apache License
public void indexSolr(String solrUrl, Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit, boolean deleteGone, String solrParams, boolean filter, boolean normalize) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("SolrIndexer: starting at " + sdf.format(start)); final JobConf job = new NutchJob(getConf()); job.setJobName("index-solr " + solrUrl); LOG.info("SolrIndexer: deleting gone documents: " + deleteGone); LOG.info("SolrIndexer: URL filtering: " + filter); LOG.info("SolrIndexer: URL normalizing: " + normalize); IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job); job.set(ReSolrConstants.SERVER_URL, solrUrl); job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone); job.setBoolean(IndexerMapReduce.URL_FILTERING, filter); job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize); if (solrParams != null) { job.set(ReSolrConstants.PARAMS, solrParams); }/*from ww w.jav a2s . co m*/ NutchIndexWriterFactory.addClassToConf(job, ReSolrWriter.class); job.setReduceSpeculativeExecution(false); final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" + new Random().nextInt()); FileOutputFormat.setOutputPath(job, tmp); try { JobClient.runJob(job); // do the commits once and for all the reducers in one go SolrServer solr = SolrUtils.getCommonsHttpSolrServer(job); if (!noCommit) { solr.commit(); } long end = System.currentTimeMillis(); LOG.info("SolrIndexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); } catch (Exception e) { LOG.error(e.toString()); } finally { FileSystem.get(job).delete(tmp, true); } }
From source file:nl.tudelft.graphalytics.mapreducev2.evo.DirectedForestFireModelJob.java
License:Apache License
@Override protected void setConfigurationParameters(JobConf jobConfiguration) { super.setConfigurationParameters(jobConfiguration); jobConfiguration.setLong(ForestFireModelUtils.MAX_ID, getParameters().getMaxId() + 1); jobConfiguration.setFloat(ForestFireModelUtils.P_RATIO, getParameters().getPRatio()); jobConfiguration.setFloat(ForestFireModelUtils.R_RATIO, getParameters().getRRatio()); jobConfiguration.set(ForestFireModelUtils.CURRENT_AMBASSADORS, ForestFireModelUtils.verticesIDsMap2String(burnedEdges)); if (getIteration() == 1) { if (getNumMappers() > 0) { jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR, getParameters().getNumNewVertices() / getNumMappers()); jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, getNumMappers()); } else {//from www . ja v a 2 s . co m jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR, getParameters().getNumNewVertices()); jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, 1024 * 1024); } jobConfiguration.setBoolean(ForestFireModelUtils.IS_INIT, true); } }
From source file:nl.tudelft.graphalytics.mapreducev2.evo.UndirectedForestFireModelJob.java
License:Apache License
@Override protected void setConfigurationParameters(JobConf jobConfiguration) { super.setConfigurationParameters(jobConfiguration); jobConfiguration.setLong(ForestFireModelUtils.MAX_ID, getParameters().getMaxId() + 1); jobConfiguration.setFloat(ForestFireModelUtils.P_RATIO, getParameters().getPRatio()); jobConfiguration.setFloat(ForestFireModelUtils.R_RATIO, getParameters().getRRatio()); jobConfiguration.set(ForestFireModelUtils.CURRENT_AMBASSADORS, ForestFireModelUtils.verticesIDsMap2String(burnedEdges)); if (getIteration() == 1) { if (getNumMappers() > 0) { jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR, getParameters().getNumNewVertices() / getNumMappers()); jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, getNumMappers()); } else {// w ww .j a v a 2 s . c o m jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR, getParameters().getNumNewVertices()); jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, 1024 * 1024); } jobConfiguration.setBoolean(ForestFireModelUtils.IS_INIT, true); } else if (getIteration() == getParameters().getMaxIterations() + 1) { jobConfiguration.setBoolean(ForestFireModelUtils.IS_FINAL, true); } }
From source file:nthu.scopelab.tsqr.ssvd.BtJob.java
License:Apache License
public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p, int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis) throws Exception { boolean outputQ = true; String stages[] = reduceSchedule.split(","); JobConf job = new JobConf(conf, BtJob.class); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setInt(SCHEDULE_NUM, stages.length); job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight); job.setInt(QJob.PROP_K, k);/*from ww w.j a va 2s .c o m*/ job.setInt(QJob.PROP_P, p); job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ); job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts); job.set(QmultiplyJob.QRF_DIR, qrfPath); FileSystem.get(job).delete(btPath, true); FileOutputFormat.setOutputPath(job, btPath); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setJobName("BtJob"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(SparseRowBlockWritable.class); job.setOutputKeyClass(IntWritable.class); //job.setOutputValueClass(SparseRowBlockWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(BtMapper.class); job.setCombinerClass(OuterProductCombiner.class); job.setReducerClass(OuterProductReducer.class); fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job)); mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job)); job.setNumMapTasks(fgather.recNumMapTasks(mis)); //job.setNumReduceTasks(0); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, inputPath); if (outputQ) { MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class, IntWritable.class, LMatrixWritable.class); } if (outputBBtProducts) { MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class, VectorWritable.class); } RunningJob rj = JobClient.runJob(job); System.out.println("Btjob Job ID: " + rj.getJobID().toString()); }
From source file:nthu.scopelab.tsqr.ssvd.itBtJob.java
License:Apache License
public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p, int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis) throws Exception { boolean outputQ = true; String stages[] = reduceSchedule.split(","); JobConf job = new JobConf(conf, itBtJob.class); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setInt(SCHEDULE_NUM, stages.length); job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight); job.setInt(QJob.PROP_K, k);/*ww w.ja v a2 s . c om*/ job.setInt(QJob.PROP_P, p); job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ); job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts); job.set(QmultiplyJob.QRF_DIR, qrfPath); FileSystem.get(job).delete(btPath, true); FileOutputFormat.setOutputPath(job, btPath); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setJobName("itBtJob"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(SparseRowBlockWritable.class); job.setOutputKeyClass(IntWritable.class); //job.setOutputValueClass(SparseRowBlockWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(BtMapper.class); job.setCombinerClass(OuterProductCombiner.class); job.setReducerClass(OuterProductReducer.class); fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job)); mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job)); job.setNumMapTasks(fgather.recNumMapTasks(mis)); //job.setNumReduceTasks(0); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, inputPath); if (outputQ) { MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class, IntWritable.class, LMatrixWritable.class); } if (outputBBtProducts) { MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class, VectorWritable.class); } RunningJob rj = JobClient.runJob(job); System.out.println("itBtJob Job ID: " + rj.getJobID().toString()); }
From source file:org.acacia.csr.java.CSRConverter.java
License:Apache License
public static void main(String[] args) throws Exception { if (!validArgs(args)) { printUsage();//from ww w .j av a 2 s. com return; } //These are the temp paths that are created on HDFS String dir1 = "/user/miyuru/csrconverter-output"; String dir2 = "/user/miyuru/csrconverter-output-sorted"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); System.out.println("Deleting the dir : " + dir1); if (fs1.exists(new Path(dir1))) { fs1.delete(new Path(dir1), true); } System.out.println("Done deleting the dir : " + dir1); System.out.println("Deleting the dir : " + dir2); if (fs1.exists(new Path(dir2))) { fs1.delete(new Path(dir2), true); } Path notinPath = new Path("/user/miyuru/notinverts/notinverts"); if (!fs1.exists(notinPath)) { fs1.create(notinPath); } System.out.println("Done deleting the dir : " + dir2); //Note on Aug 23 2014: Sometimes after this the mapReduce job hangs. need to see why. VertexCounterClient.setDefaultGraphID(args[3], args[2]); //First job creates the inverted index JobConf conf = new JobConf(CSRConverter.class); conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[1]); conf.set("org.acacia.partitioner.hbase.table", args[2]); conf.set("org.acacia.partitioner.hbase.contacthost", args[3]); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); //conf.setMapperClass(InvertedMapper.class); conf.setReducerClass(InvertedReducer.class); //conf.setInputFormat(TextInputFormat.class); conf.setInputFormat(NLinesInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); //FileInputFormat.setInputPaths(conf, new Path(args[0])); MultipleInputs.addInputPath(conf, new Path(args[0]), NLinesInputFormat.class, InvertedMapper.class); MultipleInputs.addInputPath(conf, new Path("/user/miyuru/notinverts/notinverts"), TextInputFormat.class, InvertedMapper.class); FileOutputFormat.setOutputPath(conf, new Path(dir1)); //Also for the moment we turn-off the speculative execution conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); conf.setNumMapTasks(96); conf.setNumReduceTasks(96); conf.setPartitionerClass(VertexPartitioner.class); conf.set("vertex-count", args[4]); conf.set("zero-flag", args[5]); Job job = new Job(conf, "csr_inverter"); job.setSortComparatorClass(SortComparator.class); job.waitForCompletion(true); }
From source file:org.apache.avro.mapred.AvroJob.java
License:Apache License
/** Indicate that a job's input data should use reflect representation.*/ public static void setInputReflect(JobConf job) { job.setBoolean(INPUT_IS_REFLECT, true); }
From source file:org.apache.avro.mapred.AvroJob.java
License:Apache License
/** Indicate that a job's map output data should use reflect representation.*/ public static void setMapOutputReflect(JobConf job) { job.setBoolean(MAP_OUTPUT_IS_REFLECT, true); }
From source file:org.apache.avro.mapred.AvroMultipleOutputs.java
License:Apache License
/** * Adds a named output for the job./*from www . j av a 2s . c o m*/ * <p/> * * @param conf job conf to add the named output * @param namedOutput named output name, it has to be a word, letters * and numbers only, cannot be the word 'part' as * that is reserved for the * default output. * @param multi indicates if the named output is multi * @param outputFormatClass OutputFormat class. * @param schema Schema to used for this namedOutput */ private static void addNamedOutput(JobConf conf, String namedOutput, boolean multi, Class<? extends OutputFormat> outputFormatClass, Schema schema) { checkNamedOutputName(namedOutput); checkNamedOutput(conf, namedOutput, true); boolean isMapOnly = conf.getNumReduceTasks() == 0; if (schema != null) conf.set(MO_PREFIX + namedOutput + ".schema", schema.toString()); conf.set(NAMED_OUTPUTS, conf.get(NAMED_OUTPUTS, "") + " " + namedOutput); conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class); conf.setBoolean(MO_PREFIX + namedOutput + MULTI, multi); }
From source file:org.apache.avro.mapred.AvroMultipleOutputs.java
License:Apache License
/** * Enables or disables counters for the named outputs. * <p/>/* ww w. ja v a 2 s . c om*/ * By default these counters are disabled. * <p/> * MultipleOutputs supports counters, by default the are disabled. * The counters group is the {@link MultipleOutputs} class name. * </p> * The names of the counters are the same as the named outputs. For multi * named outputs the name of the counter is the concatenation of the named * output, and underscore '_' and the multiname. * * @param conf job conf to enableadd the named output. * @param enabled indicates if the counters will be enabled or not. */ public static void setCountersEnabled(JobConf conf, boolean enabled) { conf.setBoolean(COUNTERS_ENABLED, enabled); }