Example usage for org.apache.hadoop.mapreduce Job setOutputValueClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the value class for job outputs.

Usage

From source file:com.kk.hadoop.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);/*from   w ww .j  av  a2s.com*/
    }
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(2);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.knewton.mapreduce.example.SSTableMRExample.java

License:Apache License

public static void main(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException, ParseException {

    long startTime = System.currentTimeMillis();
    Options options = buildOptions();//from  w  w w.  java2  s . c om

    CommandLineParser cliParser = new BasicParser();
    CommandLine cli = cliParser.parse(options, args);
    if (cli.getArgs().length < 2 || cli.hasOption('h')) {
        printUsage(options);
    }
    Job job = getJobConf(cli);

    job.setJarByClass(SSTableMRExample.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(StudentEventWritable.class);

    job.setMapperClass(StudentEventMapper.class);
    job.setReducerClass(StudentEventReducer.class);

    job.setInputFormatClass(SSTableColumnInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    // input arg
    String inputPaths = cli.getArgs()[0];
    LOG.info("Setting initial input paths to {}", inputPaths);
    SSTableInputFormat.addInputPaths(job, inputPaths);
    // output arg
    FileOutputFormat.setOutputPath(job, new Path(cli.getArgs()[1]));
    if (cli.hasOption('c')) {
        LOG.info("Using compression for output.");
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
        FileOutputFormat.setCompressOutput(job, true);
    }
    job.waitForCompletion(true);
    LOG.info("Total runtime: {}s", (System.currentTimeMillis() - startTime) / 1000);
}

From source file:com.knewton.mrtool.example.JsonMRExample.java

License:Apache License

/**
 * //from w ww.j av  a2s.com
 * @param args
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(new Configuration());

    job.setInputFormatClass(RecommendationsInputFormat.class);
    RecommendationsInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(RecommendationWritable.class);

    job.waitForCompletion(true);
}

From source file:com.kse.bigdata.main.Driver.java

License:Apache License

public static void main(String[] args) throws Exception {
    /**********************************************************************************
     **    Merge the source files into one.                                          **
    /**    Should change the directories of each file before executing the program   **
    ***********************************************************************************/
    //        String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug";
    //        String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv";
    //        File resultFile = new File(resultFileDirectory);
    //        if(!resultFile.exists())
    //            new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles();

    /**********************************************************************************
     * Hadoop Operation.//from   w w  w.j  ava2 s  .  c o m
     * Befort Start, Check the Length of Sequence We Want to Predict.
     **********************************************************************************/

    Configuration conf = new Configuration();

    //Enable MapReduce intermediate compression as Snappy
    conf.setBoolean("mapred.compress.map.output", true);
    conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");

    //Enable Profiling
    //conf.setBoolean("mapred.task.profile", true);

    String testPath = null;
    String inputPath = null;
    String outputPath = null;

    int sampleSize = 1;
    ArrayList<String> results = new ArrayList<String>();

    for (int index = 0; index < args.length; index++) {

        /*
         * Mandatory command
         */
        //Extract input path string from command line.
        if (args[index].equals("-in"))
            inputPath = args[index + 1];

        //Extract output path string from command line.
        if (args[index].equals("-out"))
            outputPath = args[index + 1];

        //Extract test data path string from command line.
        if (args[index].equals("-test"))
            testPath = args[index + 1];

        /*
         * Optional command
         */
        //Extract a number of neighbors.
        if (args[index].equals("-nn"))
            conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1]));

        //Whether job uses normalization or not.
        if (args[index].equals("-norm"))
            conf.setBoolean(Map.NORMALIZATION, true);

        //Extract the number of sample size to test.
        if (args[index].equals("-s"))
            sampleSize = Integer.valueOf(args[index + 1]);

        //Whether job uses mean or median
        //[Default : mean]
        if (args[index].equals("-med"))
            conf.setBoolean(Reduce.MEDIAN, true);
    }

    String outputFileName = "part-r-00000";
    SequenceSampler sampler = new SequenceSampler(testPath, sampleSize);
    LinkedList<Sequence> testSequences = sampler.getRandomSample();

    //        Test Sequence
    //        String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100";
    //        Sequence testSeq = new Sequence(testSeqString);
    //        LinkedList<Sequence> testSequences = new LinkedList<>();
    //        testSequences.add(testSeq);

    for (Sequence seq : testSequences) {

        /*
         ********************  Hadoop Launch ***********************
         */

        System.out.println(seq.getTailString());

        conf.set(Map.INPUT_SEQUENCE, seq.toString());

        Job job = new Job(conf);
        job.setJarByClass(Driver.class);
        job.setJobName("term-project-driver");

        job.setMapperClass(Map.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);

        //          Should think another way to implement the combiner class
        //          Current Implementation is not helpful to Job.
        //          job.setCombinerClass(Combiner.class);

        //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set.
        job.setNumReduceTasks(1);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        job.waitForCompletion(true);

        /*
         * if job finishes, get result of the job and store it in results(list).
         */
        try {
            FileSystem hdfs = FileSystem.get(new Configuration());
            BufferedReader fileReader = new BufferedReader(
                    new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName))));

            String line;
            while ((line = fileReader.readLine()) != null) {
                results.add(seq.getSeqString() + " " + line);
            }

            fileReader.close();

            hdfs.delete(new Path(outputPath), true);
            hdfs.close();

        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    }

    /*
     * if all jobs finish, store results of jobs to output/result.txt file.
     */
    String finalOutputPath = "output/result.csv";
    try {
        FileSystem hdfs = FileSystem.get(new Configuration());
        Path file = new Path(finalOutputPath);
        if (hdfs.exists(file)) {
            hdfs.delete(file, true);
        }

        OutputStream os = hdfs.create(file);
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8"));

        //CSV File Header
        printWriter.println("Actual,Predicted,MER,MAE");
        printWriter.flush();

        for (String result : results) {
            String[] tokens = result.split("\\s+");

            printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]);
            printWriter.flush();
        }

        printWriter.close();
        hdfs.close();
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }

}

From source file:com.laizuozuoba.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    // System.setProperty("hadoop.home.dir", "D:\\hadoop-2.2.0");
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from  w w w .ja v a 2 s. co m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    Job job2 = new Job(conf, "uv");
    job2.setJarByClass(WordCount.class);
    job2.setMapperClass(UVMapper.class);
    job2.setCombinerClass(UVReducer.class);
    job2.setReducerClass(UVReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job2, new Path("hdfs://10.18.106.67:9100/result2"));

    ControlledJob controlledJob = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());
    controlledJob2.addDependingJob(controlledJob);
    JobControl jc = new JobControl("123");
    jc.addJob(controlledJob);
    jc.addJob(controlledJob2);

    Thread jcThread = new Thread(jc);
    jcThread.start();
    while (true) {
        if (jc.allFinished()) {
            System.out.println(jc.getSuccessfulJobList());
            jc.stop();
            break;
        }
        if (jc.getFailedJobList().size() > 0) {
            System.out.println(jc.getFailedJobList());
            jc.stop();
            break;
        }
        Thread.sleep(1000);
    }
    System.out.println("Finished!!!!!!!!!!!!!!!!!!!!!!!");
}

From source file:com.lakhani.anchorgraph.anchorgraph.java

public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    //DistributedCache.addCacheFile(new URI("hdfs://zphdc1n1:8020/user/clakhani/anchorgraph/centroids.txt"), conf);
    conf.set("numberCentroids", args[3]);
    conf.set("numberFeatures", args[4]);
    Job job = new Job(conf, "anchorgraph");
    job.addCacheFile(new URI(args[2]));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(Map.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    job.setJarByClass(anchorgraph.class);
    job.submit();/*from  www .  j  a  v a  2s .c  o m*/
    int rc = (job.waitForCompletion(true)) ? 1 : 0;
    return rc;
}

From source file:com.lakhani.anchorgraph.testCache.java

public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "testCache");
    job.addCacheFile(new URI("hdfs://zphdc1n1:8020/user/clakhani/anchorgraph/centroids.txt"));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(Map.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJarByClass(testCache.class);
    job.submit();/*  w w w .  jav  a2  s  . c o m*/
    int rc = (job.waitForCompletion(true)) ? 1 : 0;
    return rc;
}

From source file:com.lakhani.anchorgraph.wordcount.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "wordcount");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setJarByClass(wordcount.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);//from w w  w  .  j  a  va2 s. co m

}

From source file:com.leon.hadoop.loganalyse.DistributedGrep.java

License:Open Source License

public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: DistributedGrep <regex> <in> <out>");
        ToolRunner.printGenericCommandUsage(System.err);
        System.exit(2);//  ww w. j  a  v a  2  s  . co  m
    }
    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "Distributed Grep");
    job.setJarByClass(DistributedGrep.class);
    job.setMapperClass(GrepMapper.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.getConfiguration().set(REGEX_KEY, otherArgs[0]);
    FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.leon.hadoop.loganalyse.WordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}