Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf() 

Source Link

Document

Construct a map/reduce job configuration.

Usage

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testToJobConf() {
    try {// w  w  w.j  a  v a 2  s .  c om
        Head head = new Head();
        String key = "hello konten";
        head.setKey(key);

        FieldMap fieldMap = new FieldMap();
        fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0));
        fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1));
        head.setFieldMap(fieldMap);

        JobConf conf = new JobConf();
        head.toJobConf(conf);

        Head head2 = new Head();
        head2.fromJobConf(conf);

        if (head2.magic != head.magic) {
            fail("error magic:" + head2.magic);
        }
        if (head2.compress != head.compress) {
            fail("error compress:" + head2.compress);
        }
        if (head2.compressStyle != head.compressStyle) {
            fail("error compressStyle:" + head2.compressStyle);
        }
        if (head2.encode != head.encode) {
            fail("error encode:" + head2.encode);
        }
        if (head2.encodeStyle != head.encodeStyle) {
            fail("error encodeStyle:" + head2.encodeStyle);
        }
        if (!head2.key.equals(head.key)) {
            fail("error key:" + head2.key);
        }
        if (head2.primaryIndex != head.primaryIndex) {
            fail("error primary index:" + head2.primaryIndex);
        }
        if (head2.var != head.var) {
            fail("error var:" + head2.var);
        }
        if (head2.ver != head.ver) {
            fail("error ver:" + head2.ver);
        }

        if (head2.fieldMap.fieldNum() != head.fieldMap.fieldNum()) {
            fail("error fieldNum:" + head2.fieldMap.fieldNum());
        }

        Field f1 = head.fieldMap.getField((short) 0);
        Field f2 = head2.fieldMap.getField((short) 0);
        if (f1.type() != f2.type()) {
            fail("error type:" + f2.type());
        }
        if (f1.len() != f2.len()) {
            fail("error len:" + f2.len());
        }
        if (f1.index() != f2.index()) {
            fail("error index:" + f2.index());
        }

        f1 = head.fieldMap.getField((short) 1);
        f2 = head2.fieldMap.getField((short) 1);
        if (f1.type() != f2.type()) {
            fail("error type:" + f2.type());
        }
        if (f1.len() != f2.len()) {
            fail("error len:" + f2.len());
        }
        if (f1.index() != f2.index()) {
            fail("error index:" + f2.index());
        }
    } catch (Exception e) {
        fail("get exception:" + e.getMessage());
    }
}

From source file:RunText.java

License:Apache License

@Override
public void run() {
    try {/*w w w  . ja  va2 s.c  om*/
        JobConf job = new JobConf();
        job.setInputFormat(format.getClass());
        RecordReader<LongWritable, Text> reader = format.getRecordReader(split, job, Reporter.NULL);
        Text value = reader.createValue();
        LongWritable key = reader.createKey();
        int count = 0;
        long t1 = System.nanoTime();
        while (reader.next(key, value)) {
            List<String> values = parse(value);
            if (values.get(index).equals(toFind)) {
                System.out.println(value);
            }
            count++;
            if (count == 100) {
                totalCount.addAndGet(100);
                count = 0;
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        runningThreads.decrementAndGet();
    }
}

From source file:StringRelevance.java

License:Apache License

@Override
protected void writeOutBloomFilter(Tap keysTap, String keyField, FileSystem fs, String path, int bloom_bits,
        int bloom_hashes) throws IOException {
    BytesBloomFilter filter = new BytesBloomFilter(bloom_bits, bloom_hashes);
    TupleEntryIterator it = new TupleEntryIterator(keysTap.getSourceFields(),
            new TapIterator(keysTap, new JobConf()));
    while (it.hasNext()) {
        TupleEntry t = it.next();/* w  ww .  ja v  a  2 s  . com*/
        String s = t.getString(keyField);
        filter.add(s.getBytes());
    }
    it.close();
    filter.writeToFileSystem(fs, new Path(path));

}

From source file:TestStringRelevance.java

License:Apache License

@Override
public void setUp() throws Exception {
    fs.delete(new Path(INPUT), true);
    fs.delete(new Path(QUERY), true);
    fs.delete(new Path(OUTPUT), true);

    inputTap = new Hfs(new SequenceFile(new Fields("str1", "str2")), INPUT);
    TapCollector coll = new TapCollector(inputTap, new JobConf());
    coll.add(tuple1);//www.ja va2 s  . c o m
    coll.add(tuple2);
    coll.add(tuple3);
    coll.add(tuple4);
    coll.add(tuple5);
    coll.add(tuple6);
    coll.add(tuple7);
    coll.add(tuple8);
    coll.add(tuple9);
    coll.close();

    keyTap = new Hfs(new SequenceFile(new Fields("str")), QUERY);
    coll = new TapCollector(keyTap, new JobConf());
    coll.add(new Tuple(new Text("nathan@rapleaf.com")));
    coll.add(new Tuple(new Text("1@gmail.com")));
    coll.add(new Tuple(new Text("2@gmail.com")));
    coll.add(new Tuple(new Text("6@gmail.com")));
    coll.close();

    outputTap = new Hfs(new SequenceFile(new Fields("str1", "str2")), OUTPUT);
}

From source file:TestStringRelevance.java

License:Apache License

public static List<Tuple> getAllTuples(Tap tap) throws IOException {
    TapIterator it = new TapIterator(tap, new JobConf());
    List<Tuple> ret = new ArrayList<Tuple>();
    while (it.hasNext()) {
        Tuple t = new Tuple(it.next()); //need to copy it since TapIterator reuses the same tuple object
        ret.add(t);//from   w  ww. j a  va 2s. c o m
    }
    return ret;
}

From source file:HadoopWordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    System.setProperty("hazelcast.logging.type", "log4j");

    Path inputPath = new Path(HadoopWordCount.class.getClassLoader().getResource("books").getPath());
    Path outputPath = new Path(OUTPUT_PATH);

    // set up the Hadoop job config, the input and output paths and formats
    JobConf jobConfig = new JobConf();
    jobConfig.setInputFormat(TextInputFormat.class);
    jobConfig.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(jobConfig, outputPath);
    TextInputFormat.addInputPath(jobConfig, inputPath);

    // Delete the output directory, if already exists
    FileSystem.get(new Configuration()).delete(outputPath, true);

    JetConfig cfg = new JetConfig();
    cfg.setInstanceConfig(new InstanceConfig()
            .setCooperativeThreadCount(Math.max(1, getRuntime().availableProcessors() / 2)));

    JetInstance jetInstance = Jet.newJetInstance(cfg);
    Jet.newJetInstance(cfg);/*  w  w w  .ja v a  2s  . c o  m*/

    try {
        System.out.print("\nCounting words from " + inputPath);
        long start = nanoTime();
        jetInstance.newJob(buildDag(jobConfig)).execute().get();
        System.out.print("Done in " + NANOSECONDS.toMillis(nanoTime() - start) + " milliseconds.");
        System.out.println("Output written to " + outputPath);
    } finally {
        Jet.shutdownAll();
    }
}

From source file:TopFiveAverageMoviesRatedByFemales.java

public static void main(String[] args) throws Exception {
    JobConf conf1 = new JobConf();
    Job job1 = new Job(conf1, "TopFiveAverageMoviesRatedByFemales");
    org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[0]),
            TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapRatings.class);
    org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[1]),
            TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapGender.class);

    job1.setReducerClass(TopFiveAverageMoviesRatedByFemales.ReduceToMovieIdAndRatings.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    job1.setJarByClass(TopFiveAverageMoviesRatedByFemales.class);

    job1.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job1, new Path(args[3]));

    boolean flag = job1.waitForCompletion(true);
    boolean flag1 = false;
    boolean flag2 = false;

    if (flag) {/*from  w  ww  . j ava2  s . c  o  m*/
        JobConf conf2 = new JobConf();
        Job job2 = new Job(conf2, "AverageCalculation");

        //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[2]), TextInputFormat.class, Map2_1.class);
        //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[3]), TextInputFormat.class, Map2_2.class);

        job2.setMapperClass(MapAverage.class);
        job2.setReducerClass(ReduceAverage.class);
        job2.setMapOutputKeyClass(Text.class);
        job2.setMapOutputValueClass(Text.class);
        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);
        job2.setJarByClass(TopFiveAverageMoviesRatedByFemales.class);

        job2.setOutputFormatClass(TextOutputFormat.class);
        FileInputFormat.addInputPath(job2, new Path(args[3]));
        FileOutputFormat.setOutputPath(job2, new Path(args[4]));

        flag1 = job2.waitForCompletion(true);
    }

    if (flag1) {
        JobConf conf3 = new JobConf();
        Job job3 = new Job(conf3, "AverageCalculation");

        org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[4]),
                TextInputFormat.class, MapAverageTop5.class);
        org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[2]),
                TextInputFormat.class, MapMovieName.class);

        //job3.setMapperClass(MapAverageTop5.class);
        job3.setReducerClass(ReduceAverageTop5.class);
        job3.setMapOutputKeyClass(Text.class);
        job3.setMapOutputValueClass(Text.class);
        job3.setOutputKeyClass(Text.class);
        job3.setOutputValueClass(Text.class);
        job3.setJarByClass(TopFiveAverageMoviesRatedByFemales.class);

        job3.setOutputFormatClass(TextOutputFormat.class);
        //FileInputFormat.addInputPath(job3, new Path(args[4]));
        FileOutputFormat.setOutputPath(job3, new Path(args[5]));

        flag2 = job3.waitForCompletion(true);

    }
}

From source file:BytesRelevance.java

License:Apache License

@Override
protected void writeOutBloomFilter(Tap keysTap, String keyField, FileSystem fs, String path, int bloom_bits,
        int bloom_hashes) throws IOException {
    BytesBloomFilter filter = new BytesBloomFilter(bloom_bits, bloom_hashes);
    TupleEntryIterator it = new TupleEntryIterator(keysTap.getSourceFields(),
            new TapIterator(keysTap, new JobConf()));
    while (it.hasNext()) {
        TupleEntry t = it.next();/*ww w.j a  v a2s  .  c o m*/
        byte[] b = getBytes((BytesWritable) t.get("keyField"));
        filter.add(b);
    }
    it.close();
    filter.writeToFileSystem(fs, new Path(path));

}

From source file:avro.HadoopAvro.java

License:Open Source License

private JobConf createJobConfig() throws IOException {
    Path inputPath = new Path(INPUT_PATH);
    Path outputPath = new Path(OUTPUT_PATH);

    FileSystem.get(new Configuration()).delete(outputPath, true);

    JobConf jobConfig = new JobConf();
    jobConfig.setInputFormat(AvroInputFormat.class);
    jobConfig.setOutputFormat(AvroOutputFormat.class);
    AvroOutputFormat.setOutputPath(jobConfig, outputPath);
    AvroInputFormat.addInputPath(jobConfig, inputPath);
    jobConfig.set(AvroJob.OUTPUT_SCHEMA, User.SCHEMA.toString());
    jobConfig.set(AvroJob.INPUT_SCHEMA, User.SCHEMA.toString());
    return jobConfig;
}

From source file:azkaban.jobtype.connectors.teradata.TeradataToHdfsJobRunnerMain.java

License:Apache License

private void runCopyTdToHdfs() throws IOException {
    if (Boolean.valueOf(_jobProps.getProperty("force.output.overwrite", "false").trim())) {
        Path path = new Path(_jobProps.getProperty(TdchConstants.TARGET_HDFS_PATH_KEY));
        _logger.info("Deleting output directory " + path.toUri());
        JobConf conf = new JobConf();
        path.getFileSystem(conf).delete(path, true);
    }/*  www  .ja v  a 2 s . c  om*/
    _logger.info(String.format("Executing %s with params: %s",
            TeradataToHdfsJobRunnerMain.class.getSimpleName(), _params));
    TeradataImportTool.main(_params.toTdchParams());
}