Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults) 

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:cascading.hbase.HBaseTapCollector.java

License:Apache License

/**
 * Constructor TapCollector creates a new TapCollector instance.
 *
 * @param flowProcess/*  w  ww . j  a v a  2s. c  o m*/
 * @param tap
 *            of type Tap
 * @throws IOException
 *             when fails to initialize
 */
public HBaseTapCollector(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap)
        throws IOException {
    super(flowProcess, tap.getScheme());
    this.hadoopFlowProcess = flowProcess;
    this.tap = tap;
    this.conf = new JobConf(flowProcess.getConfigCopy());
    this.setOutput(this);
}

From source file:cascading.platform.hadoop.HadoopPlatform.java

License:Open Source License

@Override
public JobConf getConfiguration() {
    return new JobConf(configuration);
}

From source file:cascading.platform.hadoop2.Hadoop2MR1Platform.java

License:Open Source License

public JobConf getConfiguration() {
    return new JobConf(configuration);
}

From source file:cascading.tap.hadoop.MultiInputFormat.java

License:Open Source License

static JobConf mergeConf(JobConf job, Map<String, String> config, boolean directly) {
    JobConf currentConf = directly ? job : new JobConf(job);

    for (String key : config.keySet()) {
        if (LOG.isDebugEnabled())
            LOG.debug("merging key: " + key + " value: " + config.get(key));

        currentConf.set(key, config.get(key));
    }//w w  w.ja  v a2  s .co m

    return currentConf;
}

From source file:cascading.tap.hadoop.TapCollector.java

License:Open Source License

/**
 * Constructor TapCollector creates a new TapCollector instance.
 *
 * @param tap    of type Tap/*from w w  w.j a va 2s  . c  om*/
 * @param prefix of type String
 * @param conf   of type JobConf
 * @throws IOException when fails to initialize
 */
public TapCollector(Tap tap, String prefix, JobConf conf) throws IOException {
    this.tap = tap;
    this.prefix = prefix == null || prefix.length() == 0 ? null : prefix;
    this.conf = new JobConf(conf);
    this.outputEntry = new TupleEntry(tap.getSinkFields());
    this.filenamePattern = conf.get("cascading.tapcollector.partname", this.filenamePattern);

    initalize();
}

From source file:cascading.tap.hadoop.TapIterator.java

License:Open Source License

/**
 * Constructor TapIterator creates a new TapIterator instance.
 *
 * @param conf of type JobConf// w w  w  . j  a v  a2 s.  c  o  m
 * @throws IOException when
 */
public TapIterator(Tap tap, JobConf conf) throws IOException {
    this.tap = tap;
    this.conf = new JobConf(conf);

    initalize();
}

From source file:cascading.tap.MultiSinkTap.java

License:Open Source License

@Override
public void sinkInit(JobConf conf) throws IOException {
    childConfigs = new ArrayList<Map<String, String>>();

    for (int i = 0; i < getTaps().length; i++) {
        Tap tap = getTaps()[i];//from w w  w .j  ava 2 s  .co m
        JobConf jobConf = new JobConf(conf);

        tap.sinkInit(jobConf);

        childConfigs.add(MultiInputFormat.getConfig(conf, jobConf));
    }
}

From source file:cc.slda.DisplayTopic.java

License:Apache License

@SuppressWarnings("unchecked")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(Settings.HELP_OPTION, false, "print the help message");
    options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg()
            .withDescription("input beta file").create(Settings.INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName(Settings.PATH_INDICATOR).hasArg()
            .withDescription("term index file").create(ParseCorpus.INDEX));
    options.addOption(OptionBuilder.withArgName(Settings.INTEGER_INDICATOR).hasArg()
            .withDescription("display top terms only (default - 10)").create(TOP_DISPLAY_OPTION));

    String betaString = null;/*  w ww  .  java2 s  .  co m*/
    String indexString = null;
    int topDisplay = TOP_DISPLAY;

    CommandLineParser parser = new GnuParser();
    HelpFormatter formatter = new HelpFormatter();
    try {
        CommandLine line = parser.parse(options, args);

        if (line.hasOption(Settings.HELP_OPTION)) {
            formatter.printHelp(ParseCorpus.class.getName(), options);
            System.exit(0);
        }

        if (line.hasOption(Settings.INPUT_OPTION)) {
            betaString = line.getOptionValue(Settings.INPUT_OPTION);
        } else {
            throw new ParseException("Parsing failed due to " + Settings.INPUT_OPTION + " not initialized...");
        }

        if (line.hasOption(ParseCorpus.INDEX)) {
            indexString = line.getOptionValue(ParseCorpus.INDEX);
        } else {
            throw new ParseException("Parsing failed due to " + ParseCorpus.INDEX + " not initialized...");
        }

        if (line.hasOption(TOP_DISPLAY_OPTION)) {
            topDisplay = Integer.parseInt(line.getOptionValue(TOP_DISPLAY_OPTION));
        }
    } catch (ParseException pe) {
        System.err.println(pe.getMessage());
        formatter.printHelp(ParseCorpus.class.getName(), options);
        System.exit(0);
    } catch (NumberFormatException nfe) {
        System.err.println(nfe.getMessage());
        System.exit(0);
    }

    JobConf conf = new JobConf(DisplayTopic.class);
    FileSystem fs = FileSystem.get(conf);

    Path indexPath = new Path(indexString);
    Preconditions.checkArgument(fs.exists(indexPath) && fs.isFile(indexPath), "Invalid index path...");

    Path betaPath = new Path(betaString);
    Preconditions.checkArgument(fs.exists(betaPath) && fs.isFile(betaPath), "Invalid beta path...");

    SequenceFile.Reader sequenceFileReader = null;
    try {
        IntWritable intWritable = new IntWritable();
        Text text = new Text();
        Map<Integer, String> termIndex = new HashMap<Integer, String>();
        sequenceFileReader = new SequenceFile.Reader(fs, indexPath, conf);
        while (sequenceFileReader.next(intWritable, text)) {
            termIndex.put(intWritable.get(), text.toString());
        }

        PairOfIntFloat pairOfIntFloat = new PairOfIntFloat();
        // HMapIFW hmap = new HMapIFW();
        HMapIDW hmap = new HMapIDW();
        TreeMap<Double, Integer> treeMap = new TreeMap<Double, Integer>();
        sequenceFileReader = new SequenceFile.Reader(fs, betaPath, conf);
        while (sequenceFileReader.next(pairOfIntFloat, hmap)) {
            treeMap.clear();

            System.out.println("==============================");
            System.out.println(
                    "Top ranked " + topDisplay + " terms for Topic " + pairOfIntFloat.getLeftElement());
            System.out.println("==============================");

            Iterator<Integer> itr1 = hmap.keySet().iterator();
            int temp1 = 0;
            while (itr1.hasNext()) {
                temp1 = itr1.next();
                treeMap.put(-hmap.get(temp1), temp1);
                if (treeMap.size() > topDisplay) {
                    treeMap.remove(treeMap.lastKey());
                }
            }

            Iterator<Double> itr2 = treeMap.keySet().iterator();
            double temp2 = 0;
            while (itr2.hasNext()) {
                temp2 = itr2.next();
                if (termIndex.containsKey(treeMap.get(temp2))) {
                    System.out.println(termIndex.get(treeMap.get(temp2)) + "\t\t" + -temp2);
                } else {
                    System.out.println("How embarrassing! Term index not found...");
                }
            }
        }
    } finally {
        IOUtils.closeStream(sequenceFileReader);
    }

    return 0;
}

From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.Fetcher.java

@Override
public int run(String[] args) throws Exception {
    JobConf jc = new JobConf(getConf());
    jc.setJarByClass(Fetcher.class);
    jc.setInputFormat(SequenceFileInputFormat.class);
    Path input = new Path(args[0], "current");
    Path output = new Path(args[1]);
    Configuration conf = CrawlerConfiguration.create();
    FileSystem fs = output.getFileSystem(conf);
    if (fs.exists(output)) {
        fs.delete(output);//from   www  . ja v a2 s.  c om
    }
    FileInputFormat.addInputPath(jc, input);
    FileOutputFormat.setOutputPath(jc, output);

    jc.setMapOutputKeyClass(Text.class);
    jc.setMapOutputValueClass(WebWritable.class);

    jc.setMapRunnerClass(Fetcher.class);
    jc.setOutputFormat(FetcherOutputFormat.class);

    JobClient.runJob(jc);
    return 0;
}

From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java

License:Apache License

@Test
public void testStreamRecordReader() throws Exception {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();/*  w  w  w . ja  v  a  2s.  co m*/
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());

    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile),
            Files.newOutputStreamSupplier(indexFile), 100L);
    writer.append(StreamFileTestUtils.createEvent(1000, "test"));
    writer.flush();

    // get splits from the input format. Expect to get 2 splits,
    // one from 0 - some offset and one from offset - Long.MAX_VALUE.
    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    StreamInputFormat.setStreamPath(conf, inputDir.toURI());
    StreamInputFormat format = new StreamInputFormat();
    List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID()));
    Assert.assertEquals(2, splits.size());

    // write another event so that the 2nd split has something to read
    writer.append(StreamFileTestUtils.createEvent(1001, "test"));
    writer.close();

    // create a record reader for the 2nd split
    StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>(
            new IdentityStreamEventDecoder());
    recordReader.initialize(splits.get(1), context);

    // check that we read the 2nd stream event
    Assert.assertTrue(recordReader.nextKeyValue());
    StreamEvent output = recordReader.getCurrentValue();
    Assert.assertEquals(1001, output.getTimestamp());
    Assert.assertEquals("test", Bytes.toString(output.getBody()));
    // check that there is nothing more to read
    Assert.assertFalse(recordReader.nextKeyValue());
}