Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults)

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:cascading.hbase.HBaseTapCollector.java

License:Apache License

/**
 * Constructor TapCollector creates a new TapCollector instance.
 *
 * @param flowProcess/*  w  ww . j  a v a  2s. c  o m*/
 * @param tap
 *            of type Tap
 * @throws IOException
 *             when fails to initialize
 */
public HBaseTapCollector(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap)
        throws IOException {
    super(flowProcess, tap.getScheme());
    this.hadoopFlowProcess = flowProcess;
    this.tap = tap;
    this.conf = new JobConf(flowProcess.getConfigCopy());
    this.setOutput(this);
}

From source file:cascading.platform.hadoop.HadoopPlatform.java

License:Open Source License

@Override
public JobConf getConfiguration() {
    return new JobConf(configuration);
}

From source file:cascading.platform.hadoop2.Hadoop2MR1Platform.java

License:Open Source License

public JobConf getConfiguration() {
    return new JobConf(configuration);
}

From source file:cascading.tap.hadoop.MultiInputFormat.java

License:Open Source License

static JobConf mergeConf(JobConf job, Map<String, String> config, boolean directly) {
    JobConf currentConf = directly ? job : new JobConf(job);

    for (String key : config.keySet()) {
        if (LOG.isDebugEnabled())
            LOG.debug("merging key: " + key + " value: " + config.get(key));

        currentConf.set(key, config.get(key));
    }//w w  w.ja  v a2  s .co m

    return currentConf;
}

From source file:cascading.tap.hadoop.TapCollector.java

License:Open Source License

/**
 * Constructor TapCollector creates a new TapCollector instance.
 *
 * @param tap    of type Tap/*from w w  w.j a va 2s  . c  om*/
 * @param prefix of type String
 * @param conf   of type JobConf
 * @throws IOException when fails to initialize
 */
public TapCollector(Tap tap, String prefix, JobConf conf) throws IOException {
    this.tap = tap;
    this.prefix = prefix == null || prefix.length() == 0 ? null : prefix;
    this.conf = new JobConf(conf);
    this.outputEntry = new TupleEntry(tap.getSinkFields());
    this.filenamePattern = conf.get("cascading.tapcollector.partname", this.filenamePattern);

    initalize();
}

From source file:cascading.tap.hadoop.TapIterator.java

License:Open Source License

/**
 * Constructor TapIterator creates a new TapIterator instance.
 *
 * @param conf of type JobConf// w w  w  . j  a v  a2 s.  c  o  m
 * @throws IOException when
 */
public TapIterator(Tap tap, JobConf conf) throws IOException {
    this.tap = tap;
    this.conf = new JobConf(conf);

    initalize();
}

From source file:cascading.tap.MultiSinkTap.java

License:Open Source License

@Override
public void sinkInit(JobConf conf) throws IOException {
    childConfigs = new ArrayList<Map<String, String>>();

    for (int i = 0; i < getTaps().length; i++) {
        Tap tap = getTaps()[i];//from w w  w .j  ava 2 s  .co m
        JobConf jobConf = new JobConf(conf);

        tap.sinkInit(jobConf);

        childConfigs.add(MultiInputFormat.getConfig(conf, jobConf));
    }
}

From source file:cc.slda.DisplayTopic.java

License:Apache License

@SuppressWarnings("unchecked")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(Settings.HELP_OPTION, false, "print the help message");
    options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg()
            .withDescription("input beta file").create(Settings.INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg()
            .withDescription("term index file").create(ParseCorpus.INDEX));
    options.addOption(OptionBuilder.withArgName(Settings.INTEGER_INDICATOR).hasArg()
            .withDescription("display top terms only (default - 10)").create(TOP_DISPLAY_OPTION));

    String betaString = null;/*  w ww  .  java2 s  .  co m*/
    String indexString = null;
    int topDisplay = TOP_DISPLAY;

    CommandLineParser parser = new GnuParser();
    HelpFormatter formatter = new HelpFormatter();
    try {
        CommandLine line = parser.parse(options, args);

        if (line.hasOption(Settings.HELP_OPTION)) {
            formatter.printHelp(ParseCorpus.class.getName(), options);
            System.exit(0);
        }

        if (line.hasOption(Settings.INPUT_OPTION)) {
            betaString = line.getOptionValue(Settings.INPUT_OPTION);
        } else {
            throw new ParseException("Parsing failed due to " + Settings.INPUT_OPTION + " not initialized...");
        }

        if (line.hasOption(ParseCorpus.INDEX)) {
            indexString = line.getOptionValue(ParseCorpus.INDEX);
        } else {
            throw new ParseException("Parsing failed due to " + ParseCorpus.INDEX + " not initialized...");
        }

        if (line.hasOption(TOP_DISPLAY_OPTION)) {
            topDisplay = Integer.parseInt(line.getOptionValue(TOP_DISPLAY_OPTION));
        }
    } catch (ParseException pe) {
        System.err.println(pe.getMessage());
        formatter.printHelp(ParseCorpus.class.getName(), options);
        System.exit(0);
    } catch (NumberFormatException nfe) {
        System.err.println(nfe.getMessage());
        System.exit(0);
    }

    JobConf conf = new JobConf(DisplayTopic.class);
    FileSystem fs = FileSystem.get(conf);

    Path indexPath = new Path(indexString);
    Preconditions.checkArgument(fs.exists(indexPath) && fs.isFile(indexPath), "Invalid index path...");

    Path betaPath = new Path(betaString);
    Preconditions.checkArgument(fs.exists(betaPath) && fs.isFile(betaPath), "Invalid beta path...");

    SequenceFile.Reader sequenceFileReader = null;
    try {
        IntWritable intWritable = new IntWritable();
        Text text = new Text();
        Map<Integer, String> termIndex = new HashMap<Integer, String>();
        sequenceFileReader = new SequenceFile.Reader(fs, indexPath, conf);
        while (sequenceFileReader.next(intWritable, text)) {
            termIndex.put(intWritable.get(), text.toString());
        }

        PairOfIntFloat pairOfIntFloat = new PairOfIntFloat();
        // HMapIFW hmap = new HMapIFW();
        HMapIDW hmap = new HMapIDW();
        TreeMap<Double, Integer> treeMap = new TreeMap<Double, Integer>();
        sequenceFileReader = new SequenceFile.Reader(fs, betaPath, conf);
        while (sequenceFileReader.next(pairOfIntFloat, hmap)) {
            treeMap.clear();

            System.out.println("==============================");
            System.out.println(
                    "Top ranked " + topDisplay + " terms for Topic " + pairOfIntFloat.getLeftElement());
            System.out.println("==============================");

            Iterator<Integer> itr1 = hmap.keySet().iterator();
            int temp1 = 0;
            while (itr1.hasNext()) {
                temp1 = itr1.next();
                treeMap.put(-hmap.get(temp1), temp1);
                if (treeMap.size() > topDisplay) {
                    treeMap.remove(treeMap.lastKey());
                }
            }

            Iterator<Double> itr2 = treeMap.keySet().iterator();
            double temp2 = 0;
            while (itr2.hasNext()) {
                temp2 = itr2.next();
                if (termIndex.containsKey(treeMap.get(temp2))) {
                    System.out.println(termIndex.get(treeMap.get(temp2)) + "\t\t" + -temp2);
                } else {
                    System.out.println("How embarrassing! Term index not found...");
                }
            }
        }
    } finally {
        IOUtils.closeStream(sequenceFileReader);
    }

    return 0;
}

From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.Fetcher.java

@Override
public int run(String[] args) throws Exception {
    JobConf jc = new JobConf(getConf());
    jc.setJarByClass(Fetcher.class);
    jc.setInputFormat(SequenceFileInputFormat.class);
    Path input = new Path(args[0], "current");
    Path output = new Path(args[1]);
    Configuration conf = CrawlerConfiguration.create();
    FileSystem fs = output.getFileSystem(conf);
    if (fs.exists(output)) {
        fs.delete(output);//from   www  . ja v a2 s.  c om
    }
    FileInputFormat.addInputPath(jc, input);
    FileOutputFormat.setOutputPath(jc, output);

    jc.setMapOutputKeyClass(Text.class);
    jc.setMapOutputValueClass(WebWritable.class);

    jc.setMapRunnerClass(Fetcher.class);
    jc.setOutputFormat(FetcherOutputFormat.class);

    JobClient.runJob(jc);
    return 0;
}

From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java

License:Apache License

@Test
public void testStreamRecordReader() throws Exception {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();/*  w  w  w . ja  v  a  2s.  co m*/
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());

    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile),
            Files.newOutputStreamSupplier(indexFile), 100L);
    writer.append(StreamFileTestUtils.createEvent(1000, "test"));
    writer.flush();

    // get splits from the input format. Expect to get 2 splits,
    // one from 0 - some offset and one from offset - Long.MAX_VALUE.
    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    StreamInputFormat.setStreamPath(conf, inputDir.toURI());
    StreamInputFormat format = new StreamInputFormat();
    List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID()));
    Assert.assertEquals(2, splits.size());

    // write another event so that the 2nd split has something to read
    writer.append(StreamFileTestUtils.createEvent(1001, "test"));
    writer.close();

    // create a record reader for the 2nd split
    StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>(
            new IdentityStreamEventDecoder());
    recordReader.initialize(splits.get(1), context);

    // check that we read the 2nd stream event
    Assert.assertTrue(recordReader.nextKeyValue());
    StreamEvent output = recordReader.getCurrentValue();
    Assert.assertEquals(1001, output.getTimestamp());
    Assert.assertEquals("test", Bytes.toString(output.getBody()));
    // check that there is nothing more to read
    Assert.assertFalse(recordReader.nextKeyValue());
}