Example usage for org.apache.hadoop.mapreduce Job setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the value class for job outputs.

Usage

From source file:com.kk.hadoop.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);/*from   w ww .j  av  a2s.com*/
    }
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(2);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.knewton.mapreduce.example.SSTableMRExample.java

License:Apache License

public static void main(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException, ParseException {

    long startTime = System.currentTimeMillis();
    Options options = buildOptions();//from  w  w w.  java2  s . c om

    CommandLineParser cliParser = new BasicParser();
    CommandLine cli = cliParser.parse(options, args);
    if (cli.getArgs().length < 2 || cli.hasOption('h')) {
        printUsage(options);
    }
    Job job = getJobConf(cli);

    job.setJarByClass(SSTableMRExample.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(StudentEventWritable.class);

    job.setMapperClass(StudentEventMapper.class);
    job.setReducerClass(StudentEventReducer.class);

    job.setInputFormatClass(SSTableColumnInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    // input arg
    String inputPaths = cli.getArgs()[0];
    LOG.info("Setting initial input paths to {}", inputPaths);
    SSTableInputFormat.addInputPaths(job, inputPaths);
    // output arg
    FileOutputFormat.setOutputPath(job, new Path(cli.getArgs()[1]));
    if (cli.hasOption('c')) {
        LOG.info("Using compression for output.");
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
        FileOutputFormat.setCompressOutput(job, true);
    }
    job.waitForCompletion(true);
    LOG.info("Total runtime: {}s", (System.currentTimeMillis() - startTime) / 1000);
}

From source file:com.knewton.mrtool.example.JsonMRExample.java

License:Apache License

/**
 * //from w ww.j av  a2s.com
 * @param args
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(new Configuration());

    job.setInputFormatClass(RecommendationsInputFormat.class);
    RecommendationsInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(RecommendationWritable.class);

    job.waitForCompletion(true);
}

From source file:com.kse.bigdata.main.Driver.java

License:Apache License

public static void main(String[] args) throws Exception {
    /**********************************************************************************
     **    Merge the source files into one.                                          **
    /**    Should change the directories of each file before executing the program   **
    ***********************************************************************************/
    //        String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug";
    //        String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv";
    //        File resultFile = new File(resultFileDirectory);
    //        if(!resultFile.exists())
    //            new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles();

    /**********************************************************************************
     * Hadoop Operation.//from   w w  w.j  ava2 s  .  c o m
     * Befort Start, Check the Length of Sequence We Want to Predict.
     **********************************************************************************/

    Configuration conf = new Configuration();

    //Enable MapReduce intermediate compression as Snappy
    conf.setBoolean("mapred.compress.map.output", true);
    conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");

    //Enable Profiling
    //conf.setBoolean("mapred.task.profile", true);

    String testPath = null;
    String inputPath = null;
    String outputPath = null;

    int sampleSize = 1;
    ArrayList<String> results = new ArrayList<String>();

    for (int index = 0; index < args.length; index++) {

        /*
         * Mandatory command
         */
        //Extract input path string from command line.
        if (args[index].equals("-in"))
            inputPath = args[index + 1];

        //Extract output path string from command line.
        if (args[index].equals("-out"))
            outputPath = args[index + 1];

        //Extract test data path string from command line.
        if (args[index].equals("-test"))
            testPath = args[index + 1];

        /*
         * Optional command
         */
        //Extract a number of neighbors.
        if (args[index].equals("-nn"))
            conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1]));

        //Whether job uses normalization or not.
        if (args[index].equals("-norm"))
            conf.setBoolean(Map.NORMALIZATION, true);

        //Extract the number of sample size to test.
        if (args[index].equals("-s"))
            sampleSize = Integer.valueOf(args[index + 1]);

        //Whether job uses mean or median
        //[Default : mean]
        if (args[index].equals("-med"))
            conf.setBoolean(Reduce.MEDIAN, true);
    }

    String outputFileName = "part-r-00000";
    SequenceSampler sampler = new SequenceSampler(testPath, sampleSize);
    LinkedList<Sequence> testSequences = sampler.getRandomSample();

    //        Test Sequence
    //        String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100";
    //        Sequence testSeq = new Sequence(testSeqString);
    //        LinkedList<Sequence> testSequences = new LinkedList<>();
    //        testSequences.add(testSeq);

    for (Sequence seq : testSequences) {

        /*
         ********************  Hadoop Launch ***********************
         */

        System.out.println(seq.getTailString());

        conf.set(Map.INPUT_SEQUENCE, seq.toString());

        Job job = new Job(conf);
        job.setJarByClass(Driver.class);
        job.setJobName("term-project-driver");

        job.setMapperClass(Map.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);

        //          Should think another way to implement the combiner class
        //          Current Implementation is not helpful to Job.
        //          job.setCombinerClass(Combiner.class);

        //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set.
        job.setNumReduceTasks(1);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        job.waitForCompletion(true);

        /*
         * if job finishes, get result of the job and store it in results(list).
         */
        try {
            FileSystem hdfs = FileSystem.get(new Configuration());
            BufferedReader fileReader = new BufferedReader(
                    new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName))));

            String line;
            while ((line = fileReader.readLine()) != null) {
                results.add(seq.getSeqString() + " " + line);
            }

            fileReader.close();

            hdfs.delete(new Path(outputPath), true);
            hdfs.close();

        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    }

    /*
     * if all jobs finish, store results of jobs to output/result.txt file.
     */
    String finalOutputPath = "output/result.csv";
    try {
        FileSystem hdfs = FileSystem.get(new Configuration());
        Path file = new Path(finalOutputPath);
        if (hdfs.exists(file)) {
            hdfs.delete(file, true);
        }

        OutputStream os = hdfs.create(file);
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8"));

        //CSV File Header
        printWriter.println("Actual,Predicted,MER,MAE");
        printWriter.flush();

        for (String result : results) {
            String[] tokens = result.split("\\s+");

            printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]);
            printWriter.flush();
        }

        printWriter.close();
        hdfs.close();
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }

}

From source file:com.laizuozuoba.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    // System.setProperty("hadoop.home.dir", "D:\\hadoop-2.2.0");
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from  w w w .ja v a 2 s. co m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    Job job2 = new Job(conf, "uv");
    job2.setJarByClass(WordCount.class);
    job2.setMapperClass(UVMapper.class);
    job2.setCombinerClass(UVReducer.class);
    job2.setReducerClass(UVReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job2, new Path("hdfs://10.18.106.67:9100/result2"));

    ControlledJob controlledJob = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());
    controlledJob2.addDependingJob(controlledJob);
    JobControl jc = new JobControl("123");
    jc.addJob(controlledJob);
    jc.addJob(controlledJob2);

    Thread jcThread = new Thread(jc);
    jcThread.start();
    while (true) {
        if (jc.allFinished()) {
            System.out.println(jc.getSuccessfulJobList());
            jc.stop();
            break;
        }
        if (jc.getFailedJobList().size() > 0) {
            System.out.println(jc.getFailedJobList());
            jc.stop();
            break;
        }
        Thread.sleep(1000);
    }
    System.out.println("Finished!!!!!!!!!!!!!!!!!!!!!!!");
}

From source file:com.lakhani.anchorgraph.anchorgraph.java

public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    //DistributedCache.addCacheFile(new URI("hdfs://zphdc1n1:8020/user/clakhani/anchorgraph/centroids.txt"), conf);
    conf.set("numberCentroids", args[3]);
    conf.set("numberFeatures", args[4]);
    Job job = new Job(conf, "anchorgraph");
    job.addCacheFile(new URI(args[2]));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(Map.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    job.setJarByClass(anchorgraph.class);
    job.submit();/*from  www .  j  a  v a  2s .c  o m*/
    int rc = (job.waitForCompletion(true)) ? 1 : 0;
    return rc;
}

From source file:com.lakhani.anchorgraph.testCache.java

public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "testCache");
    job.addCacheFile(new URI("hdfs://zphdc1n1:8020/user/clakhani/anchorgraph/centroids.txt"));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(Map.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJarByClass(testCache.class);
    job.submit();/*  w w w .  jav  a2  s  . c o m*/
    int rc = (job.waitForCompletion(true)) ? 1 : 0;
    return rc;
}

From source file:com.lakhani.anchorgraph.wordcount.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "wordcount");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setJarByClass(wordcount.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);//from w w  w  .  j  a  va2 s. co m

}

From source file:com.leon.hadoop.loganalyse.DistributedGrep.java

License:Open Source License

public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: DistributedGrep <regex> <in> <out>");
        ToolRunner.printGenericCommandUsage(System.err);
        System.exit(2);//  ww w. j  a  v a  2  s  . co  m
    }
    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "Distributed Grep");
    job.setJarByClass(DistributedGrep.class);
    job.setMapperClass(GrepMapper.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.getConfiguration().set(REGEX_KEY, otherArgs[0]);
    FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.leon.hadoop.loganalyse.WordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}