List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf()
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testToJobConf() { try {// w w w.j a v a 2 s . c om Head head = new Head(); String key = "hello konten"; head.setKey(key); FieldMap fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0)); fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1)); head.setFieldMap(fieldMap); JobConf conf = new JobConf(); head.toJobConf(conf); Head head2 = new Head(); head2.fromJobConf(conf); if (head2.magic != head.magic) { fail("error magic:" + head2.magic); } if (head2.compress != head.compress) { fail("error compress:" + head2.compress); } if (head2.compressStyle != head.compressStyle) { fail("error compressStyle:" + head2.compressStyle); } if (head2.encode != head.encode) { fail("error encode:" + head2.encode); } if (head2.encodeStyle != head.encodeStyle) { fail("error encodeStyle:" + head2.encodeStyle); } if (!head2.key.equals(head.key)) { fail("error key:" + head2.key); } if (head2.primaryIndex != head.primaryIndex) { fail("error primary index:" + head2.primaryIndex); } if (head2.var != head.var) { fail("error var:" + head2.var); } if (head2.ver != head.ver) { fail("error ver:" + head2.ver); } if (head2.fieldMap.fieldNum() != head.fieldMap.fieldNum()) { fail("error fieldNum:" + head2.fieldMap.fieldNum()); } Field f1 = head.fieldMap.getField((short) 0); Field f2 = head2.fieldMap.getField((short) 0); if (f1.type() != f2.type()) { fail("error type:" + f2.type()); } if (f1.len() != f2.len()) { fail("error len:" + f2.len()); } if (f1.index() != f2.index()) { fail("error index:" + f2.index()); } f1 = head.fieldMap.getField((short) 1); f2 = head2.fieldMap.getField((short) 1); if (f1.type() != f2.type()) { fail("error type:" + f2.type()); } if (f1.len() != f2.len()) { fail("error len:" + f2.len()); } if (f1.index() != f2.index()) { fail("error index:" + f2.index()); } } catch (Exception e) { fail("get exception:" + e.getMessage()); } }
From source file:RunText.java
License:Apache License
@Override public void run() { try {/*w w w . ja va2 s.c om*/ JobConf job = new JobConf(); job.setInputFormat(format.getClass()); RecordReader<LongWritable, Text> reader = format.getRecordReader(split, job, Reporter.NULL); Text value = reader.createValue(); LongWritable key = reader.createKey(); int count = 0; long t1 = System.nanoTime(); while (reader.next(key, value)) { List<String> values = parse(value); if (values.get(index).equals(toFind)) { System.out.println(value); } count++; if (count == 100) { totalCount.addAndGet(100); count = 0; } } } catch (Exception e) { throw new RuntimeException(e); } finally { runningThreads.decrementAndGet(); } }
From source file:StringRelevance.java
License:Apache License
@Override protected void writeOutBloomFilter(Tap keysTap, String keyField, FileSystem fs, String path, int bloom_bits, int bloom_hashes) throws IOException { BytesBloomFilter filter = new BytesBloomFilter(bloom_bits, bloom_hashes); TupleEntryIterator it = new TupleEntryIterator(keysTap.getSourceFields(), new TapIterator(keysTap, new JobConf())); while (it.hasNext()) { TupleEntry t = it.next();/* w ww . ja v a 2 s . com*/ String s = t.getString(keyField); filter.add(s.getBytes()); } it.close(); filter.writeToFileSystem(fs, new Path(path)); }
From source file:TestStringRelevance.java
License:Apache License
@Override public void setUp() throws Exception { fs.delete(new Path(INPUT), true); fs.delete(new Path(QUERY), true); fs.delete(new Path(OUTPUT), true); inputTap = new Hfs(new SequenceFile(new Fields("str1", "str2")), INPUT); TapCollector coll = new TapCollector(inputTap, new JobConf()); coll.add(tuple1);//www.ja va2 s . c o m coll.add(tuple2); coll.add(tuple3); coll.add(tuple4); coll.add(tuple5); coll.add(tuple6); coll.add(tuple7); coll.add(tuple8); coll.add(tuple9); coll.close(); keyTap = new Hfs(new SequenceFile(new Fields("str")), QUERY); coll = new TapCollector(keyTap, new JobConf()); coll.add(new Tuple(new Text("nathan@rapleaf.com"))); coll.add(new Tuple(new Text("1@gmail.com"))); coll.add(new Tuple(new Text("2@gmail.com"))); coll.add(new Tuple(new Text("6@gmail.com"))); coll.close(); outputTap = new Hfs(new SequenceFile(new Fields("str1", "str2")), OUTPUT); }
From source file:TestStringRelevance.java
License:Apache License
public static List<Tuple> getAllTuples(Tap tap) throws IOException { TapIterator it = new TapIterator(tap, new JobConf()); List<Tuple> ret = new ArrayList<Tuple>(); while (it.hasNext()) { Tuple t = new Tuple(it.next()); //need to copy it since TapIterator reuses the same tuple object ret.add(t);//from w ww. j a va 2s. c o m } return ret; }
From source file:HadoopWordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { System.setProperty("hazelcast.logging.type", "log4j"); Path inputPath = new Path(HadoopWordCount.class.getClassLoader().getResource("books").getPath()); Path outputPath = new Path(OUTPUT_PATH); // set up the Hadoop job config, the input and output paths and formats JobConf jobConfig = new JobConf(); jobConfig.setInputFormat(TextInputFormat.class); jobConfig.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setOutputPath(jobConfig, outputPath); TextInputFormat.addInputPath(jobConfig, inputPath); // Delete the output directory, if already exists FileSystem.get(new Configuration()).delete(outputPath, true); JetConfig cfg = new JetConfig(); cfg.setInstanceConfig(new InstanceConfig() .setCooperativeThreadCount(Math.max(1, getRuntime().availableProcessors() / 2))); JetInstance jetInstance = Jet.newJetInstance(cfg); Jet.newJetInstance(cfg);/* w w w .ja v a 2s . c o m*/ try { System.out.print("\nCounting words from " + inputPath); long start = nanoTime(); jetInstance.newJob(buildDag(jobConfig)).execute().get(); System.out.print("Done in " + NANOSECONDS.toMillis(nanoTime() - start) + " milliseconds."); System.out.println("Output written to " + outputPath); } finally { Jet.shutdownAll(); } }
From source file:TopFiveAverageMoviesRatedByFemales.java
public static void main(String[] args) throws Exception { JobConf conf1 = new JobConf(); Job job1 = new Job(conf1, "TopFiveAverageMoviesRatedByFemales"); org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[0]), TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapRatings.class); org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[1]), TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapGender.class); job1.setReducerClass(TopFiveAverageMoviesRatedByFemales.ReduceToMovieIdAndRatings.class); job1.setMapOutputKeyClass(Text.class); job1.setMapOutputValueClass(Text.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); job1.setJarByClass(TopFiveAverageMoviesRatedByFemales.class); job1.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job1, new Path(args[3])); boolean flag = job1.waitForCompletion(true); boolean flag1 = false; boolean flag2 = false; if (flag) {/*from w ww . j ava2 s . c o m*/ JobConf conf2 = new JobConf(); Job job2 = new Job(conf2, "AverageCalculation"); //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[2]), TextInputFormat.class, Map2_1.class); //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[3]), TextInputFormat.class, Map2_2.class); job2.setMapperClass(MapAverage.class); job2.setReducerClass(ReduceAverage.class); job2.setMapOutputKeyClass(Text.class); job2.setMapOutputValueClass(Text.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); job2.setJarByClass(TopFiveAverageMoviesRatedByFemales.class); job2.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job2, new Path(args[3])); FileOutputFormat.setOutputPath(job2, new Path(args[4])); flag1 = job2.waitForCompletion(true); } if (flag1) { JobConf conf3 = new JobConf(); Job job3 = new Job(conf3, "AverageCalculation"); org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[4]), TextInputFormat.class, MapAverageTop5.class); org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[2]), TextInputFormat.class, MapMovieName.class); //job3.setMapperClass(MapAverageTop5.class); job3.setReducerClass(ReduceAverageTop5.class); job3.setMapOutputKeyClass(Text.class); job3.setMapOutputValueClass(Text.class); job3.setOutputKeyClass(Text.class); job3.setOutputValueClass(Text.class); job3.setJarByClass(TopFiveAverageMoviesRatedByFemales.class); job3.setOutputFormatClass(TextOutputFormat.class); //FileInputFormat.addInputPath(job3, new Path(args[4])); FileOutputFormat.setOutputPath(job3, new Path(args[5])); flag2 = job3.waitForCompletion(true); } }
From source file:BytesRelevance.java
License:Apache License
@Override protected void writeOutBloomFilter(Tap keysTap, String keyField, FileSystem fs, String path, int bloom_bits, int bloom_hashes) throws IOException { BytesBloomFilter filter = new BytesBloomFilter(bloom_bits, bloom_hashes); TupleEntryIterator it = new TupleEntryIterator(keysTap.getSourceFields(), new TapIterator(keysTap, new JobConf())); while (it.hasNext()) { TupleEntry t = it.next();/*ww w.j a v a2s . c o m*/ byte[] b = getBytes((BytesWritable) t.get("keyField")); filter.add(b); } it.close(); filter.writeToFileSystem(fs, new Path(path)); }
From source file:avro.HadoopAvro.java
License:Open Source License
private JobConf createJobConfig() throws IOException { Path inputPath = new Path(INPUT_PATH); Path outputPath = new Path(OUTPUT_PATH); FileSystem.get(new Configuration()).delete(outputPath, true); JobConf jobConfig = new JobConf(); jobConfig.setInputFormat(AvroInputFormat.class); jobConfig.setOutputFormat(AvroOutputFormat.class); AvroOutputFormat.setOutputPath(jobConfig, outputPath); AvroInputFormat.addInputPath(jobConfig, inputPath); jobConfig.set(AvroJob.OUTPUT_SCHEMA, User.SCHEMA.toString()); jobConfig.set(AvroJob.INPUT_SCHEMA, User.SCHEMA.toString()); return jobConfig; }
From source file:azkaban.jobtype.connectors.teradata.TeradataToHdfsJobRunnerMain.java
License:Apache License
private void runCopyTdToHdfs() throws IOException { if (Boolean.valueOf(_jobProps.getProperty("force.output.overwrite", "false").trim())) { Path path = new Path(_jobProps.getProperty(TdchConstants.TARGET_HDFS_PATH_KEY)); _logger.info("Deleting output directory " + path.toUri()); JobConf conf = new JobConf(); path.getFileSystem(conf).delete(path, true); }/* www .ja v a 2 s . c om*/ _logger.info(String.format("Executing %s with params: %s", TeradataToHdfsJobRunnerMain.class.getSimpleName(), _params)); TeradataImportTool.main(_params.toTdchParams()); }