List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf()
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java
License:Apache License
@Test public void testOneFile() throws IOException, InterruptedException { Path dir = new Path(tempFolder.getRoot().getAbsolutePath()); CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>(); Path inputFile = new Path(dir, "file1.txt"); writeSequenceFile(inputFile);/*from w w w .j av a 2 s . co m*/ Job job = new Job(new JobConf()); FileInputFormat.addInputPath(job, inputFile); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size()); TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0); Configuration conf1 = new Configuration(); TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId); RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1); rr.initialize(splits.get(0), context1); assertTrue(rr.nextKeyValue()); assertEquals(key, rr.getCurrentKey()); assertEquals(value, rr.getCurrentValue()); assertFalse(rr.nextKeyValue()); assertEquals(1.0f, rr.getProgress(), 0.1); }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java
License:Apache License
@Test public void testTwoFiles() throws IOException, InterruptedException { Path dir = new Path(tempFolder.getRoot().getAbsolutePath()); CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>(); Path inputFile1 = new Path(dir, "file1.txt"); Path inputFile2 = new Path(dir, "file2.txt"); writeSequenceFile(inputFile1);/*from w ww . j a va2 s .co m*/ writeSequenceFile(inputFile2); Job job = new Job(new JobConf()); FileInputFormat.addInputPath(job, inputFile1); FileInputFormat.addInputPath(job, inputFile2); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size()); TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0); Configuration conf1 = new Configuration(); TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId); RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1); rr.initialize(splits.get(0), context1); assertTrue(rr.nextKeyValue()); assertEquals(key, rr.getCurrentKey()); assertEquals(value, rr.getCurrentValue()); assertEquals(0.5f, rr.getProgress(), 0.1); assertTrue(rr.nextKeyValue()); assertEquals(key, rr.getCurrentKey()); assertEquals(value, rr.getCurrentValue()); assertFalse(rr.nextKeyValue()); assertEquals(1.0f, rr.getProgress(), 0.1); }
From source file:com.bah.culvert.CulvertOutputFormatTest.java
License:Apache License
@Ignore(value = "Filed as bug. #375") @Test//w w w . j a v a 2s . c o m public void testBasicOperation() throws Throwable { CulvertOutputFormat format = new CulvertOutputFormat(); // the client only works with configurations JobConf conf = new JobConf(); Client.setDatabaseAdapter(conf, InMemoryDB.class); /* * most of the stuff we set in the table properties because we use the * jobconf earlier for input stuff */ Properties tblProps = CulvertHiveUtils.confToProps(conf); InMemoryDB db = new InMemoryDB(); CColumn col = new CColumn("foo".getBytes(), "bar".getBytes()); db.create("baz", Arrays.asList(col)); CulvertHiveUtils.setCulvertConfigurationIsEmbedded(tblProps, true); CulvertHiveUtils.setCulvertTable(tblProps, "baz"); final int[] i = { 0 }; Progressable progress = new Progressable() { @Override public void progress() { i[0]++; } }; RecordWriter writer = format.getHiveRecordWriter(conf, null, Put.class, true, tblProps, progress); writer.write(new Put(new CKeyValue("a".getBytes(), "b".getBytes(), "c".getBytes(), "d".getBytes()))); Assert.assertEquals(1, i[0]); SeekingCurrentIterator it = db.getTableAdapter("baz").get(new Get(new CRange("a".getBytes()))); // this is failing - looks like stuff has been put but isn't coming out of // the get Assert.assertTrue("Iterator should have a next value", it.hasNext()); Result next = it.next(); Assert.assertTrue("Result row should be 'a' byte equivalent", Arrays.equals("a".getBytes(), next.getRecordId())); Assert.assertTrue("Result should be ", Arrays.equals("d".getBytes(), next.getValue("b".getBytes(), "c".getBytes()).getValue())); }
From source file:com.benchmark.mapred.terasort.TeraGen.java
License:Apache License
public static void main(String[] args) throws Exception { int res = ToolRunner.run(new JobConf(), new TeraGen(), args); System.exit(res);//from w ww.ja v a 2s. c o m }
From source file:com.benchmark.mapred.terasort.TeraSort.java
License:Apache License
/** * @param args/*from w w w.j a v a 2 s .c o m*/ */ public static void main(String[] args) throws Exception { int res = ToolRunner.run(new JobConf(), new TeraSort(), args); System.exit(res); }
From source file:com.benchmark.mapred.terasort.TeraValidate.java
License:Apache License
/** * @param args//from w w w . j ava2 s. c om */ public static void main(String[] args) throws Exception { int res = ToolRunner.run(new JobConf(), new TeraValidate(), args); System.exit(res); }
From source file:com.bosscs.spark.mongodb.config.MongoDeepJobConfig.java
License:Apache License
/** * {@inheritDoc}/* ww w. j a va2 s.c o m*/ */ @Override public MongoDeepJobConfig<T> initialize() { validate(); super.initialize(); configHadoop = new JobConf(); configHadoop = new Configuration(); StringBuilder connection = new StringBuilder(); connection.append("mongodb").append(":").append("//"); if (username != null && password != null) { connection.append(username).append(":").append(password).append("@"); } boolean firstHost = true; for (String hostName : host) { if (!firstHost) { connection.append(","); } connection.append(hostName); firstHost = false; } connection.append("/").append(catalog).append(".").append(table); StringBuilder options = new StringBuilder(); boolean asignado = false; if (readPreference != null) { asignado = true; options.append("?readPreference=").append(readPreference); } if (replicaSet != null) { if (asignado) { options.append("&"); } else { options.append("?"); } options.append("replicaSet=").append(replicaSet); } connection.append(options); configHadoop.set(MongoConfigUtil.INPUT_URI, connection.toString()); configHadoop.set(MongoConfigUtil.OUTPUT_URI, connection.toString()); configHadoop.set(MongoConfigUtil.INPUT_SPLIT_SIZE, String.valueOf(splitSize)); if (inputKey != null) { configHadoop.set(MongoConfigUtil.INPUT_KEY, inputKey); } configHadoop.set(MongoConfigUtil.SPLITS_USE_SHARDS, String.valueOf(useShards)); configHadoop.set(MongoConfigUtil.CREATE_INPUT_SPLITS, String.valueOf(createInputSplit)); configHadoop.set(MongoConfigUtil.SPLITS_USE_CHUNKS, String.valueOf(splitsUseChunks)); if (query != null) { configHadoop.set(MongoConfigUtil.INPUT_QUERY, query.toString()); } if (fields != null) { configHadoop.set(MongoConfigUtil.INPUT_FIELDS, fields.toString()); } if (sort != null) { configHadoop.set(MongoConfigUtil.INPUT_SORT, sort); } if (username != null && password != null) { configHadoop.set(MongoConfigUtil.INPUT_URI, connection.toString()); } if (customConfiguration != null) { Set<Map.Entry<String, Serializable>> set = customConfiguration.entrySet(); Iterator<Map.Entry<String, Serializable>> iterator = set.iterator(); while (iterator.hasNext()) { Map.Entry<String, Serializable> entry = iterator.next(); configHadoop.set(entry.getKey(), entry.getValue().toString()); } } return this; }
From source file:com.cloudera.knittingboar.records.TestTwentyNewsgroupsCustomRecordParseOLRRun.java
License:Apache License
@Before public void setup() throws Exception { defaultConf = new JobConf(); defaultConf.set("fs.defaultFS", "file:///"); localFs = FileSystem.getLocal(defaultConf); inputFileName = "kboar-shard-0.txt"; baseDir = Files.createTempDir(); File inputFile = new File(baseDir, inputFileName); TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile); workDir = new Path(baseDir.getAbsolutePath()); }
From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndSingleWorker.java
License:Apache License
@Before public void setup() throws Exception { defaultConf = new JobConf(); defaultConf.set("fs.defaultFS", "file:///"); localFs = FileSystem.getLocal(defaultConf); inputFileName = "kboar-shard-0.txt"; baseDir = Files.createTempDir(); File inputFile = new File(baseDir, inputFileName); TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile); workDir = new Path(baseDir.getAbsolutePath()); configuration = new Configuration(); // feature vector size configuration.setInt("com.cloudera.knittingboar.setup.FeatureVectorSize", 10000); configuration.setInt("com.cloudera.knittingboar.setup.numCategories", 20); // local input split path configuration.set("com.cloudera.knittingboar.setup.LocalInputSplitPath", "hdfs://127.0.0.1/input/0"); configuration.set("com.cloudera.knittingboar.setup.RecordFactoryClassname", "com.cloudera.knittingboar.records.TwentyNewsgroupsRecordFactory"); /*//from ww w . j ava 2 s . c om * // predictor label names c.set( * "com.cloudera.knittingboar.setup.PredictorLabelNames", "x,y" ); // * predictor var types c.set( * "com.cloudera.knittingboar.setup.PredictorVariableTypes", * "numeric,numeric" ); // target variables c.set( * "com.cloudera.knittingboar.setup.TargetVariableName", "color" ); // * column header names c.set( * "com.cloudera.knittingboar.setup.ColumnHeaderNames", * "x,y,shape,color,k,k0,xx,xy,yy,a,b,c,bias" ); //c.set( * "com.cloudera.knittingboar.setup.ColumnHeaderNames", * "\"x\",\"y\",\"shape\",\"color\",\"k\",\"k0\",\"xx\",\"xy\",\"yy\",\"a\",\"b\",\"c\",\"bias\"\n" * ); */ }
From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndTwoWorkers.java
License:Apache License
@Before public void setup() throws Exception { defaultConf = new JobConf(); defaultConf.set("fs.defaultFS", "file:///"); localFs = FileSystem.getLocal(defaultConf); inputFileName = "kboar-shard-0.txt"; baseDir = Files.createTempDir(); File inputFile = new File(baseDir, inputFileName); TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile); workDir = new Path(baseDir.getAbsolutePath()); configuration = new Configuration(); // feature vector size configuration.setInt("com.cloudera.knittingboar.setup.FeatureVectorSize", 10000); configuration.setInt("com.cloudera.knittingboar.setup.numCategories", 20); configuration.setInt("com.cloudera.knittingboar.setup.BatchSize", 200); // local input split path configuration.set("com.cloudera.knittingboar.setup.LocalInputSplitPath", "hdfs://127.0.0.1/input/0"); configuration.set("com.cloudera.knittingboar.setup.RecordFactoryClassname", "com.cloudera.knittingboar.records.TwentyNewsgroupsRecordFactory"); /* // predictor label names c.set( "com.cloudera.knittingboar.setup.PredictorLabelNames", "x,y" ); // ww w. j a va2 s. c o m // predictor var types c.set( "com.cloudera.knittingboar.setup.PredictorVariableTypes", "numeric,numeric" ); // target variables c.set( "com.cloudera.knittingboar.setup.TargetVariableName", "color" ); // column header names c.set( "com.cloudera.knittingboar.setup.ColumnHeaderNames", "x,y,shape,color,k,k0,xx,xy,yy,a,b,c,bias" ); //c.set( "com.cloudera.knittingboar.setup.ColumnHeaderNames", "\"x\",\"y\",\"shape\",\"color\",\"k\",\"k0\",\"xx\",\"xy\",\"yy\",\"a\",\"b\",\"c\",\"bias\"\n" ); */ }