List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:cascading.hbase.HBaseTapCollector.java
License:Apache License
/** * Constructor TapCollector creates a new TapCollector instance. * * @param flowProcess/* w ww . j a v a 2s. c o m*/ * @param tap * of type Tap * @throws IOException * when fails to initialize */ public HBaseTapCollector(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap) throws IOException { super(flowProcess, tap.getScheme()); this.hadoopFlowProcess = flowProcess; this.tap = tap; this.conf = new JobConf(flowProcess.getConfigCopy()); this.setOutput(this); }
From source file:cascading.platform.hadoop.HadoopPlatform.java
License:Open Source License
@Override public JobConf getConfiguration() { return new JobConf(configuration); }
From source file:cascading.platform.hadoop2.Hadoop2MR1Platform.java
License:Open Source License
public JobConf getConfiguration() { return new JobConf(configuration); }
From source file:cascading.tap.hadoop.MultiInputFormat.java
License:Open Source License
static JobConf mergeConf(JobConf job, Map<String, String> config, boolean directly) { JobConf currentConf = directly ? job : new JobConf(job); for (String key : config.keySet()) { if (LOG.isDebugEnabled()) LOG.debug("merging key: " + key + " value: " + config.get(key)); currentConf.set(key, config.get(key)); }//w w w.ja v a2 s .co m return currentConf; }
From source file:cascading.tap.hadoop.TapCollector.java
License:Open Source License
/** * Constructor TapCollector creates a new TapCollector instance. * * @param tap of type Tap/*from w w w.j a va 2s . c om*/ * @param prefix of type String * @param conf of type JobConf * @throws IOException when fails to initialize */ public TapCollector(Tap tap, String prefix, JobConf conf) throws IOException { this.tap = tap; this.prefix = prefix == null || prefix.length() == 0 ? null : prefix; this.conf = new JobConf(conf); this.outputEntry = new TupleEntry(tap.getSinkFields()); this.filenamePattern = conf.get("cascading.tapcollector.partname", this.filenamePattern); initalize(); }
From source file:cascading.tap.hadoop.TapIterator.java
License:Open Source License
/** * Constructor TapIterator creates a new TapIterator instance. * * @param conf of type JobConf// w w w . j a v a2 s. c o m * @throws IOException when */ public TapIterator(Tap tap, JobConf conf) throws IOException { this.tap = tap; this.conf = new JobConf(conf); initalize(); }
From source file:cascading.tap.MultiSinkTap.java
License:Open Source License
@Override public void sinkInit(JobConf conf) throws IOException { childConfigs = new ArrayList<Map<String, String>>(); for (int i = 0; i < getTaps().length; i++) { Tap tap = getTaps()[i];//from w w w .j ava 2 s .co m JobConf jobConf = new JobConf(conf); tap.sinkInit(jobConf); childConfigs.add(MultiInputFormat.getConfig(conf, jobConf)); } }
From source file:cc.slda.DisplayTopic.java
License:Apache License
@SuppressWarnings("unchecked") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(Settings.HELP_OPTION, false, "print the help message"); options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg() .withDescription("input beta file").create(Settings.INPUT_OPTION)); options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg() .withDescription("term index file").create(ParseCorpus.INDEX)); options.addOption(OptionBuilder.withArgName(Settings.INTEGER_INDICATOR).hasArg() .withDescription("display top terms only (default - 10)").create(TOP_DISPLAY_OPTION)); String betaString = null;/* w ww . java2 s . co m*/ String indexString = null; int topDisplay = TOP_DISPLAY; CommandLineParser parser = new GnuParser(); HelpFormatter formatter = new HelpFormatter(); try { CommandLine line = parser.parse(options, args); if (line.hasOption(Settings.HELP_OPTION)) { formatter.printHelp(ParseCorpus.class.getName(), options); System.exit(0); } if (line.hasOption(Settings.INPUT_OPTION)) { betaString = line.getOptionValue(Settings.INPUT_OPTION); } else { throw new ParseException("Parsing failed due to " + Settings.INPUT_OPTION + " not initialized..."); } if (line.hasOption(ParseCorpus.INDEX)) { indexString = line.getOptionValue(ParseCorpus.INDEX); } else { throw new ParseException("Parsing failed due to " + ParseCorpus.INDEX + " not initialized..."); } if (line.hasOption(TOP_DISPLAY_OPTION)) { topDisplay = Integer.parseInt(line.getOptionValue(TOP_DISPLAY_OPTION)); } } catch (ParseException pe) { System.err.println(pe.getMessage()); formatter.printHelp(ParseCorpus.class.getName(), options); System.exit(0); } catch (NumberFormatException nfe) { System.err.println(nfe.getMessage()); System.exit(0); } JobConf conf = new JobConf(DisplayTopic.class); FileSystem fs = FileSystem.get(conf); Path indexPath = new Path(indexString); Preconditions.checkArgument(fs.exists(indexPath) && fs.isFile(indexPath), "Invalid index path..."); Path betaPath = new Path(betaString); Preconditions.checkArgument(fs.exists(betaPath) && fs.isFile(betaPath), "Invalid beta path..."); SequenceFile.Reader sequenceFileReader = null; try { IntWritable intWritable = new IntWritable(); Text text = new Text(); Map<Integer, String> termIndex = new HashMap<Integer, String>(); sequenceFileReader = new SequenceFile.Reader(fs, indexPath, conf); while (sequenceFileReader.next(intWritable, text)) { termIndex.put(intWritable.get(), text.toString()); } PairOfIntFloat pairOfIntFloat = new PairOfIntFloat(); // HMapIFW hmap = new HMapIFW(); HMapIDW hmap = new HMapIDW(); TreeMap<Double, Integer> treeMap = new TreeMap<Double, Integer>(); sequenceFileReader = new SequenceFile.Reader(fs, betaPath, conf); while (sequenceFileReader.next(pairOfIntFloat, hmap)) { treeMap.clear(); System.out.println("=============================="); System.out.println( "Top ranked " + topDisplay + " terms for Topic " + pairOfIntFloat.getLeftElement()); System.out.println("=============================="); Iterator<Integer> itr1 = hmap.keySet().iterator(); int temp1 = 0; while (itr1.hasNext()) { temp1 = itr1.next(); treeMap.put(-hmap.get(temp1), temp1); if (treeMap.size() > topDisplay) { treeMap.remove(treeMap.lastKey()); } } Iterator<Double> itr2 = treeMap.keySet().iterator(); double temp2 = 0; while (itr2.hasNext()) { temp2 = itr2.next(); if (termIndex.containsKey(treeMap.get(temp2))) { System.out.println(termIndex.get(treeMap.get(temp2)) + "\t\t" + -temp2); } else { System.out.println("How embarrassing! Term index not found..."); } } } } finally { IOUtils.closeStream(sequenceFileReader); } return 0; }
From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.Fetcher.java
@Override public int run(String[] args) throws Exception { JobConf jc = new JobConf(getConf()); jc.setJarByClass(Fetcher.class); jc.setInputFormat(SequenceFileInputFormat.class); Path input = new Path(args[0], "current"); Path output = new Path(args[1]); Configuration conf = CrawlerConfiguration.create(); FileSystem fs = output.getFileSystem(conf); if (fs.exists(output)) { fs.delete(output);//from www . ja v a2 s. c om } FileInputFormat.addInputPath(jc, input); FileOutputFormat.setOutputPath(jc, output); jc.setMapOutputKeyClass(Text.class); jc.setMapOutputValueClass(WebWritable.class); jc.setMapRunnerClass(Fetcher.class); jc.setOutputFormat(FetcherOutputFormat.class); JobClient.runJob(jc); return 0; }
From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java
License:Apache License
@Test public void testStreamRecordReader() throws Exception { File inputDir = tmpFolder.newFolder(); File partition = new File(inputDir, "1.1000"); partition.mkdirs();/* w w w . ja v a 2s. co m*/ File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix()); File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix()); // write 1 event StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L); writer.append(StreamFileTestUtils.createEvent(1000, "test")); writer.flush(); // get splits from the input format. Expect to get 2 splits, // one from 0 - some offset and one from offset - Long.MAX_VALUE. Configuration conf = new Configuration(); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); StreamInputFormat.setStreamPath(conf, inputDir.toURI()); StreamInputFormat format = new StreamInputFormat(); List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID())); Assert.assertEquals(2, splits.size()); // write another event so that the 2nd split has something to read writer.append(StreamFileTestUtils.createEvent(1001, "test")); writer.close(); // create a record reader for the 2nd split StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>( new IdentityStreamEventDecoder()); recordReader.initialize(splits.get(1), context); // check that we read the 2nd stream event Assert.assertTrue(recordReader.nextKeyValue()); StreamEvent output = recordReader.getCurrentValue(); Assert.assertEquals(1001, output.getTimestamp()); Assert.assertEquals("test", Bytes.toString(output.getBody())); // check that there is nothing more to read Assert.assertFalse(recordReader.nextKeyValue()); }