Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf()

Source Link

Document

Construct a map/reduce job configuration.

Usage

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testToJobConf() {
    try {// w  w  w.j  a  v a 2  s .  c om
        Head head = new Head();
        String key = "hello konten";
        head.setKey(key);

        FieldMap fieldMap = new FieldMap();
        fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0));
        fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1));
        head.setFieldMap(fieldMap);

        JobConf conf = new JobConf();
        head.toJobConf(conf);

        Head head2 = new Head();
        head2.fromJobConf(conf);

        if (head2.magic != head.magic) {
            fail("error magic:" + head2.magic);
        }
        if (head2.compress != head.compress) {
            fail("error compress:" + head2.compress);
        }
        if (head2.compressStyle != head.compressStyle) {
            fail("error compressStyle:" + head2.compressStyle);
        }
        if (head2.encode != head.encode) {
            fail("error encode:" + head2.encode);
        }
        if (head2.encodeStyle != head.encodeStyle) {
            fail("error encodeStyle:" + head2.encodeStyle);
        }
        if (!head2.key.equals(head.key)) {
            fail("error key:" + head2.key);
        }
        if (head2.primaryIndex != head.primaryIndex) {
            fail("error primary index:" + head2.primaryIndex);
        }
        if (head2.var != head.var) {
            fail("error var:" + head2.var);
        }
        if (head2.ver != head.ver) {
            fail("error ver:" + head2.ver);
        }

        if (head2.fieldMap.fieldNum() != head.fieldMap.fieldNum()) {
            fail("error fieldNum:" + head2.fieldMap.fieldNum());
        }

        Field f1 = head.fieldMap.getField((short) 0);
        Field f2 = head2.fieldMap.getField((short) 0);
        if (f1.type() != f2.type()) {
            fail("error type:" + f2.type());
        }
        if (f1.len() != f2.len()) {
            fail("error len:" + f2.len());
        }
        if (f1.index() != f2.index()) {
            fail("error index:" + f2.index());
        }

        f1 = head.fieldMap.getField((short) 1);
        f2 = head2.fieldMap.getField((short) 1);
        if (f1.type() != f2.type()) {
            fail("error type:" + f2.type());
        }
        if (f1.len() != f2.len()) {
            fail("error len:" + f2.len());
        }
        if (f1.index() != f2.index()) {
            fail("error index:" + f2.index());
        }
    } catch (Exception e) {
        fail("get exception:" + e.getMessage());
    }
}

From source file:RunText.java

License:Apache License

@Override
public void run() {
    try {/*w w w  . ja  va2 s.c  om*/
        JobConf job = new JobConf();
        job.setInputFormat(format.getClass());
        RecordReader<LongWritable, Text> reader = format.getRecordReader(split, job, Reporter.NULL);
        Text value = reader.createValue();
        LongWritable key = reader.createKey();
        int count = 0;
        long t1 = System.nanoTime();
        while (reader.next(key, value)) {
            List<String> values = parse(value);
            if (values.get(index).equals(toFind)) {
                System.out.println(value);
            }
            count++;
            if (count == 100) {
                totalCount.addAndGet(100);
                count = 0;
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        runningThreads.decrementAndGet();
    }
}

From source file:StringRelevance.java

License:Apache License

@Override
protected void writeOutBloomFilter(Tap keysTap, String keyField, FileSystem fs, String path, int bloom_bits,
        int bloom_hashes) throws IOException {
    BytesBloomFilter filter = new BytesBloomFilter(bloom_bits, bloom_hashes);
    TupleEntryIterator it = new TupleEntryIterator(keysTap.getSourceFields(),
            new TapIterator(keysTap, new JobConf()));
    while (it.hasNext()) {
        TupleEntry t = it.next();/* w  ww .  ja v  a  2 s  . com*/
        String s = t.getString(keyField);
        filter.add(s.getBytes());
    }
    it.close();
    filter.writeToFileSystem(fs, new Path(path));

}

From source file:TestStringRelevance.java

License:Apache License

@Override
public void setUp() throws Exception {
    fs.delete(new Path(INPUT), true);
    fs.delete(new Path(QUERY), true);
    fs.delete(new Path(OUTPUT), true);

    inputTap = new Hfs(new SequenceFile(new Fields("str1", "str2")), INPUT);
    TapCollector coll = new TapCollector(inputTap, new JobConf());
    coll.add(tuple1);//www.ja va2 s  . c o m
    coll.add(tuple2);
    coll.add(tuple3);
    coll.add(tuple4);
    coll.add(tuple5);
    coll.add(tuple6);
    coll.add(tuple7);
    coll.add(tuple8);
    coll.add(tuple9);
    coll.close();

    keyTap = new Hfs(new SequenceFile(new Fields("str")), QUERY);
    coll = new TapCollector(keyTap, new JobConf());
    coll.add(new Tuple(new Text("nathan@rapleaf.com")));
    coll.add(new Tuple(new Text("1@gmail.com")));
    coll.add(new Tuple(new Text("2@gmail.com")));
    coll.add(new Tuple(new Text("6@gmail.com")));
    coll.close();

    outputTap = new Hfs(new SequenceFile(new Fields("str1", "str2")), OUTPUT);
}

From source file:TestStringRelevance.java

License:Apache License

public static List<Tuple> getAllTuples(Tap tap) throws IOException {
    TapIterator it = new TapIterator(tap, new JobConf());
    List<Tuple> ret = new ArrayList<Tuple>();
    while (it.hasNext()) {
        Tuple t = new Tuple(it.next()); //need to copy it since TapIterator reuses the same tuple object
        ret.add(t);//from   w  ww. j a  va 2s. c o m
    }
    return ret;
}

From source file:HadoopWordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    System.setProperty("hazelcast.logging.type", "log4j");

    Path inputPath = new Path(HadoopWordCount.class.getClassLoader().getResource("books").getPath());
    Path outputPath = new Path(OUTPUT_PATH);

    // set up the Hadoop job config, the input and output paths and formats
    JobConf jobConfig = new JobConf();
    jobConfig.setInputFormat(TextInputFormat.class);
    jobConfig.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(jobConfig, outputPath);
    TextInputFormat.addInputPath(jobConfig, inputPath);

    // Delete the output directory, if already exists
    FileSystem.get(new Configuration()).delete(outputPath, true);

    JetConfig cfg = new JetConfig();
    cfg.setInstanceConfig(new InstanceConfig()
            .setCooperativeThreadCount(Math.max(1, getRuntime().availableProcessors() / 2)));

    JetInstance jetInstance = Jet.newJetInstance(cfg);
    Jet.newJetInstance(cfg);/*  w  w w  .ja v a  2s  . c o  m*/

    try {
        System.out.print("\nCounting words from " + inputPath);
        long start = nanoTime();
        jetInstance.newJob(buildDag(jobConfig)).execute().get();
        System.out.print("Done in " + NANOSECONDS.toMillis(nanoTime() - start) + " milliseconds.");
        System.out.println("Output written to " + outputPath);
    } finally {
        Jet.shutdownAll();
    }
}

From source file:TopFiveAverageMoviesRatedByFemales.java

public static void main(String[] args) throws Exception {
    JobConf conf1 = new JobConf();
    Job job1 = new Job(conf1, "TopFiveAverageMoviesRatedByFemales");
    org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[0]),
            TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapRatings.class);
    org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[1]),
            TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapGender.class);

    job1.setReducerClass(TopFiveAverageMoviesRatedByFemales.ReduceToMovieIdAndRatings.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    job1.setJarByClass(TopFiveAverageMoviesRatedByFemales.class);

    job1.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job1, new Path(args[3]));

    boolean flag = job1.waitForCompletion(true);
    boolean flag1 = false;
    boolean flag2 = false;

    if (flag) {/*from  w  ww  . j ava2  s . c  o  m*/
        JobConf conf2 = new JobConf();
        Job job2 = new Job(conf2, "AverageCalculation");

        //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[2]), TextInputFormat.class, Map2_1.class);
        //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[3]), TextInputFormat.class, Map2_2.class);

        job2.setMapperClass(MapAverage.class);
        job2.setReducerClass(ReduceAverage.class);
        job2.setMapOutputKeyClass(Text.class);
        job2.setMapOutputValueClass(Text.class);
        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);
        job2.setJarByClass(TopFiveAverageMoviesRatedByFemales.class);

        job2.setOutputFormatClass(TextOutputFormat.class);
        FileInputFormat.addInputPath(job2, new Path(args[3]));
        FileOutputFormat.setOutputPath(job2, new Path(args[4]));

        flag1 = job2.waitForCompletion(true);
    }

    if (flag1) {
        JobConf conf3 = new JobConf();
        Job job3 = new Job(conf3, "AverageCalculation");

        org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[4]),
                TextInputFormat.class, MapAverageTop5.class);
        org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[2]),
                TextInputFormat.class, MapMovieName.class);

        //job3.setMapperClass(MapAverageTop5.class);
        job3.setReducerClass(ReduceAverageTop5.class);
        job3.setMapOutputKeyClass(Text.class);
        job3.setMapOutputValueClass(Text.class);
        job3.setOutputKeyClass(Text.class);
        job3.setOutputValueClass(Text.class);
        job3.setJarByClass(TopFiveAverageMoviesRatedByFemales.class);

        job3.setOutputFormatClass(TextOutputFormat.class);
        //FileInputFormat.addInputPath(job3, new Path(args[4]));
        FileOutputFormat.setOutputPath(job3, new Path(args[5]));

        flag2 = job3.waitForCompletion(true);

    }
}

From source file:BytesRelevance.java

License:Apache License

@Override
protected void writeOutBloomFilter(Tap keysTap, String keyField, FileSystem fs, String path, int bloom_bits,
        int bloom_hashes) throws IOException {
    BytesBloomFilter filter = new BytesBloomFilter(bloom_bits, bloom_hashes);
    TupleEntryIterator it = new TupleEntryIterator(keysTap.getSourceFields(),
            new TapIterator(keysTap, new JobConf()));
    while (it.hasNext()) {
        TupleEntry t = it.next();/*ww w.j a  v a2s  .  c o m*/
        byte[] b = getBytes((BytesWritable) t.get("keyField"));
        filter.add(b);
    }
    it.close();
    filter.writeToFileSystem(fs, new Path(path));

}

From source file:avro.HadoopAvro.java

License:Open Source License

private JobConf createJobConfig() throws IOException {
    Path inputPath = new Path(INPUT_PATH);
    Path outputPath = new Path(OUTPUT_PATH);

    FileSystem.get(new Configuration()).delete(outputPath, true);

    JobConf jobConfig = new JobConf();
    jobConfig.setInputFormat(AvroInputFormat.class);
    jobConfig.setOutputFormat(AvroOutputFormat.class);
    AvroOutputFormat.setOutputPath(jobConfig, outputPath);
    AvroInputFormat.addInputPath(jobConfig, inputPath);
    jobConfig.set(AvroJob.OUTPUT_SCHEMA, User.SCHEMA.toString());
    jobConfig.set(AvroJob.INPUT_SCHEMA, User.SCHEMA.toString());
    return jobConfig;
}

From source file:azkaban.jobtype.connectors.teradata.TeradataToHdfsJobRunnerMain.java

License:Apache License

private void runCopyTdToHdfs() throws IOException {
    if (Boolean.valueOf(_jobProps.getProperty("force.output.overwrite", "false").trim())) {
        Path path = new Path(_jobProps.getProperty(TdchConstants.TARGET_HDFS_PATH_KEY));
        _logger.info("Deleting output directory " + path.toUri());
        JobConf conf = new JobConf();
        path.getFileSystem(conf).delete(path, true);
    }/*  www  .ja v  a 2 s . c  om*/
    _logger.info(String.format("Executing %s with params: %s",
            TeradataToHdfsJobRunnerMain.class.getSimpleName(), _params));
    TeradataImportTool.main(_params.toTdchParams());
}