List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java
License:Apache License
@Test public void testReadAllTypesNull() throws IOException, InterruptedException { Configuration config = new Configuration(); RecordServiceInputFormat.RecordServiceRecordReader reader = new RecordServiceInputFormat.RecordServiceRecordReader(); try {//from w ww . jav a 2 s . co m RecordServiceConfig.setInputTable(config, null, "rs.alltypes_null"); List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits; int numRows = 0; for (InputSplit split : splits) { reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID())); while (reader.nextKeyValue()) { RecordServiceRecord value = reader.getCurrentValue(); for (int i = 0; i < value.getSchema().getNumColumns(); ++i) { assertTrue(value.getColumnValue(i) == null); } ++numRows; } } assertEquals(1, numRows); } finally { reader.close(); } }
From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java
License:Apache License
@Test public void testCountStar() throws IOException, InterruptedException { Configuration config = new Configuration(); TextInputFormat.TextRecordReader reader = new TextInputFormat.TextRecordReader(); try {/*from w ww . j ava 2s . co m*/ RecordServiceConfig.setInputQuery(config, "select count(*) from tpch.nation"); List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits; int numRows = 0; for (InputSplit split : splits) { reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID())); while (reader.nextKeyValue()) { ++numRows; } } assertEquals(25, numRows); } finally { reader.close(); } }
From source file:com.cloudera.recordservice.mapreduce.testapps.RecordCount.java
License:Apache License
public static long countRecords(String path) throws IOException { String output = TestUtil.getTempDirectory(); Path inputPath = new Path(path); Path outputPath = new Path(output); JobConf conf = new JobConf(RecordCount.class); conf.setJobName("recordcount"); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(LongWritable.class); conf.setInt("mapreduce.job.reduces", 1); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); JobClient.runJob(conf);/*from w w w . ja v a 2s . co m*/ // Read the result and return it. Since we set the number of reducers to 1, // there is always just one file containing the value. FileSystem fs = outputPath.getFileSystem(conf); FSDataInputStream resultStream = fs.open(new Path(output + "/part-00000")); byte[] bytes = new byte[16]; int length = resultStream.read(bytes); String result = new String(bytes, 0, length).trim(); return Long.parseLong(result); }
From source file:com.cloudera.recordservice.tests.MiniClusterController.java
License:Apache License
/** * This method returns a JobConf object that allows a map reduce job to be run * on the minicluster/*ww w. j a v a2s . c o m*/ */ public JobConf getJobConf(Class<?> mrClass) { if (clusterList_.size() == 0) { System.err.println("Cannot run MR job because the cluster has no active nodes"); return null; } JobConf conf = new JobConf(mrClass); conf.set(RecordServiceConfig.ConfVars.PLANNER_HOSTPORTS_CONF.name, "localhost:" + getRandomNode().plannerPort_); return conf; }
From source file:com.cloudera.recordservice.tests.TestMiniClusterController.java
License:Apache License
public static JobConf createWordCountMRJobConf() { JobConf conf = new JobConf(WordCount.class); fillInWordCountMRJobConf(conf);/*w w w .j av a 2s. c o m*/ return conf; }
From source file:com.cloudera.sqoop.manager.DirectPostgreSQLExportManualTest.java
License:Apache License
public DirectPostgreSQLExportManualTest() { JobConf conf = new JobConf(getConf()); DBConfiguration.configureDB(conf, "org.postgresql.Driver", getConnectString(), getUserName(), (String) null, (Integer) null);/* w w w .j av a2 s .c om*/ dbConf = new DBConfiguration(conf); }
From source file:com.cloudera.sqoop.manager.PGBulkloadManagerManualTest.java
License:Apache License
public PGBulkloadManagerManualTest() { JobConf conf = new JobConf(getConf()); DBConfiguration.configureDB(conf, "org.postgresql.Driver", getConnectString(), getUserName(), (String) null, (Integer) null);//from w ww . ja v a 2s . co m dbConf = new DBConfiguration(conf); }
From source file:com.curiousby.baoyou.cn.hadoop.HDFSUtils.java
License:Open Source License
public void init() { try {//from ww w . j a v a 2s.com System.setProperty("hadoop.home.dir", "I:\\software\\hadoop-2.6.0"); configuration = new JobConf(HDFSUtils.class); configuration.setBoolean("dfs.support.append", true); configuration.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER"); configuration.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true"); fileSystem = FileSystem.get(URI.create(HDFS_URL), configuration); } catch (IOException e) { System.out.println("??"); e.printStackTrace(); } }
From source file:com.dataartisans.flink.cascading.planner.FlinkFlowStep.java
License:Apache License
private DataSet<Tuple> translateSource(FlowProcess flowProcess, ExecutionEnvironment env, FlowNode node, int dop) { Tap tap = this.getSingle(node.getSourceTaps()); JobConf tapConfig = new JobConf(this.getNodeConfig(node)); tap.sourceConfInit(flowProcess, tapConfig); tapConfig.set("cascading.step.source", Tap.id(tap)); Fields outFields = tap.getSourceFields(); registerKryoTypes(outFields);/*from www.j a v a 2 s. co m*/ JobConf sourceConfig = new JobConf(this.getNodeConfig(node)); MultiInputFormat.addInputFormat(sourceConfig, tapConfig); DataSet<Tuple> src = env.createInput(new TapInputFormat(node), new TupleTypeInfo(outFields)) .name(tap.getIdentifier()).setParallelism(dop) .withParameters(FlinkConfigConverter.toFlinkConfig(new Configuration(sourceConfig))); return src; }
From source file:com.dataartisans.flink.cascading.runtime.util.FlinkFlowProcess.java
License:Apache License
@Override public TupleEntryCollector openTrapForWrite(Tap trap) throws IOException { if (trap instanceof Hfs) { JobConf jobConf = new JobConf(this.getConfigCopy()); int stepNum = jobConf.getInt("cascading.flow.step.num", 0); int nodeNum = jobConf.getInt("cascading.flow.node.num", 0); String partname = String.format("-%05d-%05d-%05d", stepNum, nodeNum, this.getCurrentSliceNum()); jobConf.set("cascading.tapcollector.partname", "%s%spart" + partname); String value = String.format("attempt_%012d_0000_m_%06d_0", (int) Math.rint(System.currentTimeMillis()), this.getCurrentSliceNum()); jobConf.set("mapred.task.id", value); jobConf.set("mapreduce.task.id", value); return trap.openForWrite(new FlinkFlowProcess(jobConf), null); } else {/*from w w w . j ava 2s . c o m*/ throw new UnsupportedOperationException("Only Hfs taps are supported as traps"); } }