Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the map output data.

Usage

From source file:com.talis.labs.pagerank.mapreduce.CheckingData.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: CheckingData <input path> <output path>");
        return -1;
    }//from w  ww  . j ava 2  s  .  c  om

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "CheckingData");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(CheckingDataMapper.class);
    job.setReducerClass(CheckingDataReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.CountPages.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: CountPages <input path> <output path>");
        return -1;
    }/*from   ww  w .  j  a v a  2 s .  c om*/

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "CountPages");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(CountPagesMapper.class);
    job.setCombinerClass(CountPagesReducer.class);
    job.setReducerClass(CountPagesReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.DanglingPages.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: DanglingPages <input path> <output path>");
        return -1;
    }//w  w w  .  j a v a2  s.c  om

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "DanglingPages");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(DanglingPagesMapper.class);
    job.setCombinerClass(DanglingPagesReducer.class);
    job.setReducerClass(DanglingPagesReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.InitializePageRanks.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: InitializePageRanks <input path> <output path> <number of pages>");
        return -1;
    }/*from   www .  ja  v  a 2  s .co m*/

    Configuration conf = getConf();
    conf.set("pagerank.count", args[2]);

    FileSystem.get(conf).delete(new Path(args[1]), true);

    Job job = new Job(conf, "InitializePageRanks");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(InitializePageRanksMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.SortPageRanks.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: SortPageRanks <input path> <output path>");
        return -1;
    }/* ww  w .j  av  a2 s. co m*/

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "SortPageRanks");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(SortPageRanksMapper.class);
    job.setReducerClass(Reducer.class); // i.e. identity reducer
    job.setSortComparatorClass(DoubleWritableDecreasingComparator.class);

    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(1); // TODO: inefficient, use InputSampler with v0.20.x

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.UpdatePageRanks.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        System.err.println(/*from ww w .jav  a  2s  .com*/
                "Usage: UpdatePageRanks <input path> <output path> <number of pages> <dangling pages contribution>");
        return -1;
    }

    Configuration conf = getConf();
    conf.set("pagerank.count", args[2]);
    conf.set("pagerank.dangling", args[3]);

    FileSystem.get(conf).delete(new Path(args[1]), true);

    Job job = new Job(conf, "UpdatePageRanks");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(UpdatePageRanksMapper.class);
    job.setReducerClass(UpdatePageRanksReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.tdunning.plume.local.lazy.MapRedExecutor.java

License:Apache License

/**
 * This method returns a Job instance out of a {@link MSCR} entity. It puts the Class of 
 * the {@link PlumeWorkflow} argument and the MSCR id in the hadoop configuration.
 * /*from   www  .ja  v  a 2  s.co m*/
 * @param mscr The MSCR to convert 
 * @param workflow The workflow whose class will be instantiated by hadoop mappers/reducers
 * @param outputPath The output path of the MapRed job
 * @return A hadoop-executable MapRed Job
 * 
 * @throws IOException
 */
static Job getMapRed(final MSCR mscr, PlumeWorkflow workFlow, String workFlowOutputPath, String outputPath)
        throws IOException {

    Configuration conf = new Configuration();
    conf.set(WORKFLOW_NAME, workFlow.getClass().getName());
    conf.setInt(MSCR_ID, mscr.getId());
    conf.set(TEMP_OUTPUT_PATH, workFlowOutputPath);

    Job job = new Job(conf, "MSCR"); // TODO deprecation

    job.setMapOutputKeyClass(PlumeObject.class);
    job.setMapOutputValueClass(PlumeObject.class);

    job.setJarByClass(MapRedExecutor.class);

    /**
     * Define multiple inputs
     */
    for (PCollection<?> input : mscr.getInputs()) {
        if (!(input instanceof LazyCollection)) {
            throw new IllegalArgumentException("Can't create MapRed from MSCR whose inputs are not LazyTable");
        }
        LazyCollection<Text> l = (LazyCollection<Text>) input;
        if (!(l.isMaterialized() && l.getFile() != null)) {
            // Collections have plume ID only if they are intermediate results - TODO better naming for this
            if (l.getPlumeId().length() < 1) {
                throw new IllegalArgumentException(
                        "Can't create MapRed from MSCR inputs that are not materialized to a file");
            }
        }
        PCollectionType<?> rType = l.getType();
        Class<? extends InputFormat> format = SequenceFileInputFormat.class;
        if (rType instanceof PTableType) {
            PTableType<?, ?> tType = (PTableType<?, ?>) rType;
            if (tType.valueType() instanceof StringType && tType.keyType() instanceof StringType) {
                format = KeyValueTextInputFormat.class;
            }
            MultipleInputs.addInputPath(job, new Path(l.getFile()), format, MSCRMapper.class);
        } else {
            if (rType.elementType() instanceof StringType) {
                format = TextInputFormat.class;
            }
            MultipleInputs.addInputPath(job, new Path(l.getFile()), format, MSCRMapper.class);
        }
    }
    /**
     * Define multiple outputs
     */
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    for (Map.Entry<PCollection<?>, Integer> entry : mscr.getNumberedChannels().entrySet()) {
        PCollectionType<?> rType = ((LazyCollection<?>) mscr.getOutputChannels().get(entry.getKey()).output)
                .getType();
        if (rType instanceof PTableType) {
            PTableType<?, ?> tType = (PTableType<?, ?>) rType;
            Class<? extends OutputFormat> outputFormat = SequenceFileOutputFormat.class;
            if (tType.keyType() instanceof StringType && tType.valueType() instanceof StringType) {
                outputFormat = TextOutputFormat.class;
            }
            MultipleOutputs.addNamedOutput(job, entry.getValue() + "", outputFormat,
                    getHadoopType(tType.keyType()), getHadoopType(tType.valueType()));
        } else {
            Class<? extends OutputFormat> outputFormat = SequenceFileOutputFormat.class;
            if (rType.elementType() instanceof StringType) {
                outputFormat = TextOutputFormat.class;
            }
            MultipleOutputs.addNamedOutput(job, entry.getValue() + "", outputFormat, NullWritable.class,
                    getHadoopType(rType.elementType()));
        }
    }
    /**
     * Define Reducer & Combiner
     */
    job.setCombinerClass(MSCRCombiner.class);
    job.setReducerClass(MSCRReducer.class);

    job.setNumReduceTasks(1);
    return job;
}

From source file:com.telefonica.iot.tidoop.mrlib.jobs.Filter.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // check the number of arguments, show the usage if it is wrong
    if (args.length != 3) {
        showUsage();//www  . j  a va2s .  co m
        return -1;
    } // if

    // get the arguments
    String input = args[0];
    String output = args[1];
    String regex = args[2];

    // create and configure a MapReduce job
    Configuration conf = this.getConf();
    conf.set(Constants.PARAM_REGEX, regex);
    Job job = Job.getInstance(conf, "tidoop-mr-lib-filter");
    job.setNumReduceTasks(1);
    job.setJarByClass(Filter.class);
    job.setMapperClass(LineFilter.class);
    job.setCombinerClass(LinesCombiner.class);
    job.setReducerClass(LinesJoiner.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    // run the MapReduce job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.telefonica.iot.tidoop.mrlib.jobs.MapOnly.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // check the number of arguments, show the usage if it is wrong
    if (args.length != 3) {
        showUsage();//from  w ww  .  j  ava 2  s  .c om
        return -1;
    } // if

    // get the arguments
    String input = args[0];
    String output = args[1];
    String mapFunction = args[2];

    // create and configure a MapReduce job
    Configuration conf = this.getConf();
    conf.set(Constants.PARAM_FUNCTION, mapFunction);
    Job job = Job.getInstance(conf, "tidoop-mr-lib-maponly");
    job.setNumReduceTasks(0);
    job.setJarByClass(MapOnly.class);
    job.setMapperClass(CustomMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    // run the MapReduce job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.teradata.compaction.mapreduce.MergeParquetFilesMR.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "MergeParquet");

    if (args.length != 2) {
        System.err.println("Usage: java -jar MergeParquetFilesMR path_to_input_folder path_to_output_folder ");
        System.exit(0);//from w w w .  j  a  v  a 2  s  . co m
    }

    final Path inputPath = new Path(args[0]);
    final Path out = new Path(args[1]);

    Schema schemaParquetFile = getBaseSchema(inputPath, conf);
    job.setJarByClass(MergeParquetFilesMR.class);
    job.setMapperClass(SampleParquetMapper.class);
    job.setReducerClass(SampleParquetReducer.class);
    job.setInputFormatClass(AvroParquetInputFormat.class);
    job.setOutputFormatClass(AvroParquetOutputFormat.class);
    job.setMapOutputKeyClass(NullWritable.class);

    AvroJob.setMapOutputValueSchema(job, schemaParquetFile);
    AvroParquetOutputFormat.setSchema(job, schemaParquetFile);
    FileInputFormat.addInputPath(job, inputPath);
    AvroParquetOutputFormat.setOutputPath(job, out);
    job.setNumReduceTasks(1);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}