List of usage examples for org.apache.hadoop.mapred JobConf setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:org.archive.nutchwax.PageRankDbMerger.java
License:Apache License
public static JobConf createMergeJob(Configuration config, Path pageRankDb, boolean normalize, boolean filter) { Path newPageRankDb = new Path( "pagerankdb-merge-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf job = new NutchJob(config); job.setJobName("pagerankdb merge " + pageRankDb); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(PageRankDbFilter.class); job.setBoolean(LinkDbFilter.URL_NORMALIZING, normalize); job.setBoolean(LinkDbFilter.URL_FILTERING, filter); job.setReducerClass(PageRankDbMerger.class); FileOutputFormat.setOutputPath(job, newPageRankDb); job.setOutputFormat(MapFileOutputFormat.class); job.setBoolean("mapred.output.compress", true); job.setOutputKeyClass(Text.class); // DIFF: Use IntWritable instead of Inlinks as the output value type. job.setOutputValueClass(IntWritable.class); return job;//from ww w . jav a2s .c o m }
From source file:org.cloudata.util.matrix.AbstractMatrix.java
License:Apache License
public void mutiply(AbstractMatrix targetMatrix, AbstractMatrix resultMatrix) throws IOException { Path tempOutputPath = new Path("temp/Matrix_" + System.currentTimeMillis()); JobConf jobConf = new JobConf(AbstractMatrix.class); jobConf.setJobName("Matrix_Mutiply_Job" + "(" + new Date() + ")"); //<MAP> jobConf.setMapperClass(MatrixMutiplyMap.class); jobConf.setInputFormat(MatrixInputFormat.class); jobConf.set(MatrixInputFormat.MATRIX_INPUT_TABLE, ctable.getTableName()); jobConf.set(MatrixInputFormat.MATRIX_INPUT_COLUMN, columnName); jobConf.set(MatrixInputFormat.MATRIX_TARGET_TABLE, targetMatrix.ctable.getTableName()); jobConf.set(MatrixInputFormat.MATRIX_TARGET_COLUMN, targetMatrix.columnName); jobConf.setBoolean(MatrixInputFormat.MATRIX_TARGET_SPARSE, targetMatrix.isSparse()); jobConf.setMapOutputKeyClass(MatrixItem.class); jobConf.setMapOutputValueClass(Text.class); //</MAP> //<REDUCE> jobConf.setPartitionerClass(KeyRangePartitioner.class); jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, resultMatrix.ctable.getTableName()); jobConf.setReducerClass(MatrixMutiplyReduce.class); jobConf.set(MatrixInputFormat.MATRIX_RESULT_TABLE, resultMatrix.ctable.getTableName()); jobConf.set(MatrixInputFormat.MATRIX_RESULT_COLUMN, resultMatrix.columnName); jobConf.setBoolean(MatrixInputFormat.MATRIX_RESULT_SPARSE, resultMatrix.isSparse()); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); TabletInfo[] tabletInfos = resultMatrix.ctable.listTabletInfos(); jobConf.setNumReduceTasks(tabletInfos.length); jobConf.setMaxReduceAttempts(0);//from ww w.j a va2s .c o m FileOutputFormat.setOutputPath(jobConf, tempOutputPath); //</REDUCE> //Run Job JobClient.runJob(jobConf); //delete temp output path FileSystem fs = FileSystem.get(jobConf); fs.delete(tempOutputPath, true); }
From source file:org.cloudata.util.upload.UploadUtil.java
License:Apache License
private void doHadoopUpload(CloudataConf conf) throws IOException { if (!CTable.existsTable(conf, tableName)) { throw new IOException("No table:" + tableName); }// ww w. ja v a 2 s . com JobConf jobConf = new JobConf(UploadUtil.class); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("UploadJob_" + tableName + "(" + new Date() + ")"); //KeyRangePartitioner //AbstractTabletInputFormat.OUTPUT_TABLE? ? jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); //<Map> FileInputFormat.addInputPath(jobConf, new Path(inputPath)); jobConf.setInputFormat(TextInputFormat.class); jobConf.set("uploadJob.delim", delim); String columnStr = ""; for (String eachColumn : columns) { columnStr += eachColumn + ","; } jobConf.set("uploadJob.columns", columnStr); String fieldNumStr = ""; for (int eachField : fieldNums) { fieldNumStr += eachField + ","; } jobConf.set("uploadJob.fieldNums", fieldNumStr); jobConf.setBoolean("uploadJob.keyValuePair", keyValuePair); jobConf.setMapperClass(UploadMap.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</Map> //<Reduce> Path tempOutputPath = new Path("temp/uploadJob/" + tableName + "/reducer"); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setNumReduceTasks(0); //</Reduce> try { JobClient.runJob(jobConf); } finally { FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.elasticsearch.hadoop.integration.HdpBootstrap.java
License:Apache License
public static JobConf hadoopConfig() { if (!hackVerified) { hackVerified = true;/* w w w .j a va 2 s .c o m*/ // check local execution if ("local".equals(TestSettings.TESTING_PROPS.get("mapred.job.tracker"))) { hackHadoopStagingOnWin(); } // damn HADOOP-9123 System.setProperty("path.separator", ":"); } JobConf conf = addProperties(new JobConf(), TestSettings.TESTING_PROPS); conf.setBoolean("mapred.used.genericoptionsparser", true); // provision if not local if (!isJtLocal(conf)) { Provisioner.provision(conf); HdfsUtils.rmr(conf, ".staging"); } return conf; }
From source file:org.elasticsearch.hadoop.integration.mr.AbstractMROldApiSearchTest.java
License:Apache License
@Test public void testSearchNonExistingIndex() throws Exception { JobConf conf = createJobConf(); conf.setBoolean(ConfigurationOptions.ES_INDEX_READ_MISSING_AS_EMPTY, true); conf.set(ConfigurationOptions.ES_RESOURCE, "foobar/save"); JobClient.runJob(conf);// w w w . j a va2s. com }
From source file:org.elasticsearch.hadoop.integration.mr.AbstractMROldApiSearchTest.java
License:Apache License
@Test(expected = EsHadoopIllegalArgumentException.class) public void testSearchUpdatedWithoutUpsertMeaningNonExistingIndex() throws Exception { JobConf conf = createJobConf(); conf.setBoolean(ConfigurationOptions.ES_INDEX_READ_MISSING_AS_EMPTY, false); conf.set(ConfigurationOptions.ES_RESOURCE, indexPrefix + "mroldapi/updatewoupsert"); JobClient.runJob(conf);/*from ww w . ja v a 2 s .com*/ }
From source file:org.elasticsearch.hadoop.integration.mr.MROldApiSaveTest.java
License:Apache License
@Test public void testBasicSave() throws Exception { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(ESOutputFormat.class); conf.setMapOutputValueClass(MapWritable.class); conf.setMapperClass(JsonMapper.class); conf.setReducerClass(IdentityReducer.class); conf.setBoolean("mapred.used.genericoptionsparser", true); FileInputFormat.setInputPaths(conf, new Path("src/test/resources/artists.dat")); conf.set("es.resource", "mroldapi/save"); JobClient.runJob(conf);/*ww w . ja v a2 s.c om*/ }
From source file:org.elasticsearch.hadoop.integration.mr.MROldApiSaveTest.java
License:Apache License
@Test(expected = IllegalArgumentException.class) public void testIndexAutoCreateDisabled() throws Exception { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(ESOutputFormat.class); conf.setMapOutputValueClass(MapWritable.class); conf.setMapperClass(JsonMapper.class); conf.setReducerClass(IdentityReducer.class); conf.setBoolean("mapred.used.genericoptionsparser", true); FileInputFormat.setInputPaths(conf, new Path("src/test/resources/artists.dat")); conf.set(ConfigurationOptions.ES_RESOURCE, "mroldapi/non-existing"); conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, "no"); JobClient.runJob(conf);/*w w w . j a v a2s . c o m*/ }
From source file:org.elasticsearch.hadoop.integration.mr.MROldApiSearchTest.java
License:Apache License
@Test public void testBasicSearch() throws Exception { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(ESInputFormat.class); conf.setOutputFormat(PrintStreamOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(MapWritable.class); conf.setBoolean("mapred.used.genericoptionsparser", true); conf.set("es.resource", "mroldapi/save/_search?q=*"); // un-comment to print results to the console (works only in local mode) //PrintStreamOutputFormat.stream(conf, Stream.OUT); JobClient.runJob(conf);/*from w w w .j a va2 s.c o m*/ }
From source file:org.elasticsearch.hadoop.integration.mr.MROldApiSearchTest.java
License:Apache License
@Test public void testSearchNonExistingIndex() throws Exception { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(ESInputFormat.class); conf.setOutputFormat(PrintStreamOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(MapWritable.class); conf.setBoolean("mapred.used.genericoptionsparser", true); conf.setBoolean(ConfigurationOptions.ES_INDEX_READ_MISSING_AS_EMPTY, true); conf.set("es.resource", "foobar/save/_search?q=*"); // un-comment to print results to the console (works only in local mode) //PrintStreamOutputFormat.stream(conf, Stream.OUT); JobClient.runJob(conf);//from w w w. j a v a2s . com }