Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the map output data.

Usage

From source file:com.talis.labs.pagerank.mapreduce.CheckingData.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: CheckingData <input path> <output path>");
        return -1;
    }//from w  ww  . j ava 2  s  .  c  om

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "CheckingData");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(CheckingDataMapper.class);
    job.setReducerClass(CheckingDataReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.CountPages.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: CountPages <input path> <output path>");
        return -1;
    }/*from   ww  w .  j  a v a  2 s .  c om*/

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "CountPages");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(CountPagesMapper.class);
    job.setCombinerClass(CountPagesReducer.class);
    job.setReducerClass(CountPagesReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.DanglingPages.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: DanglingPages <input path> <output path>");
        return -1;
    }//w  w w  .  j a v a2  s.c  om

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "DanglingPages");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(DanglingPagesMapper.class);
    job.setCombinerClass(DanglingPagesReducer.class);
    job.setReducerClass(DanglingPagesReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.InitializePageRanks.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: InitializePageRanks <input path> <output path> <number of pages>");
        return -1;
    }/*from   www .  ja  v  a 2  s .co m*/

    Configuration conf = getConf();
    conf.set("pagerank.count", args[2]);

    FileSystem.get(conf).delete(new Path(args[1]), true);

    Job job = new Job(conf, "InitializePageRanks");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(InitializePageRanksMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.SortPageRanks.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: SortPageRanks <input path> <output path>");
        return -1;
    }/* ww  w .j  av  a2 s. co m*/

    FileSystem.get(getConf()).delete(new Path(args[1]), true);

    Job job = new Job(getConf(), "SortPageRanks");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(SortPageRanksMapper.class);
    job.setReducerClass(Reducer.class); // i.e. identity reducer
    job.setSortComparatorClass(DoubleWritableDecreasingComparator.class);

    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(1); // TODO: inefficient, use InputSampler with v0.20.x

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.labs.pagerank.mapreduce.UpdatePageRanks.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        System.err.println(/*from ww w .jav  a  2s  .com*/
                "Usage: UpdatePageRanks <input path> <output path> <number of pages> <dangling pages contribution>");
        return -1;
    }

    Configuration conf = getConf();
    conf.set("pagerank.count", args[2]);
    conf.set("pagerank.dangling", args[3]);

    FileSystem.get(conf).delete(new Path(args[1]), true);

    Job job = new Job(conf, "UpdatePageRanks");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(UpdatePageRanksMapper.class);
    job.setReducerClass(UpdatePageRanksReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.tdunning.plume.local.lazy.MapRedExecutor.java

License:Apache License

/**
 * This method returns a Job instance out of a {@link MSCR} entity. It puts the Class of 
 * the {@link PlumeWorkflow} argument and the MSCR id in the hadoop configuration.
 * /*from   www  .ja  v  a 2  s.co m*/
 * @param mscr The MSCR to convert 
 * @param workflow The workflow whose class will be instantiated by hadoop mappers/reducers
 * @param outputPath The output path of the MapRed job
 * @return A hadoop-executable MapRed Job
 * 
 * @throws IOException
 */
static Job getMapRed(final MSCR mscr, PlumeWorkflow workFlow, String workFlowOutputPath, String outputPath)
        throws IOException {

    Configuration conf = new Configuration();
    conf.set(WORKFLOW_NAME, workFlow.getClass().getName());
    conf.setInt(MSCR_ID, mscr.getId());
    conf.set(TEMP_OUTPUT_PATH, workFlowOutputPath);

    Job job = new Job(conf, "MSCR"); // TODO deprecation

    job.setMapOutputKeyClass(PlumeObject.class);
    job.setMapOutputValueClass(PlumeObject.class);

    job.setJarByClass(MapRedExecutor.class);

    /**
     * Define multiple inputs
     */
    for (PCollection<?> input : mscr.getInputs()) {
        if (!(input instanceof LazyCollection)) {
            throw new IllegalArgumentException("Can't create MapRed from MSCR whose inputs are not LazyTable");
        }
        LazyCollection<Text> l = (LazyCollection<Text>) input;
        if (!(l.isMaterialized() && l.getFile() != null)) {
            // Collections have plume ID only if they are intermediate results - TODO better naming for this
            if (l.getPlumeId().length() < 1) {
                throw new IllegalArgumentException(
                        "Can't create MapRed from MSCR inputs that are not materialized to a file");
            }
        }
        PCollectionType<?> rType = l.getType();
        Class<? extends InputFormat> format = SequenceFileInputFormat.class;
        if (rType instanceof PTableType) {
            PTableType<?, ?> tType = (PTableType<?, ?>) rType;
            if (tType.valueType() instanceof StringType && tType.keyType() instanceof StringType) {
                format = KeyValueTextInputFormat.class;
            }
            MultipleInputs.addInputPath(job, new Path(l.getFile()), format, MSCRMapper.class);
        } else {
            if (rType.elementType() instanceof StringType) {
                format = TextInputFormat.class;
            }
            MultipleInputs.addInputPath(job, new Path(l.getFile()), format, MSCRMapper.class);
        }
    }
    /**
     * Define multiple outputs
     */
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    for (Map.Entry<PCollection<?>, Integer> entry : mscr.getNumberedChannels().entrySet()) {
        PCollectionType<?> rType = ((LazyCollection<?>) mscr.getOutputChannels().get(entry.getKey()).output)
                .getType();
        if (rType instanceof PTableType) {
            PTableType<?, ?> tType = (PTableType<?, ?>) rType;
            Class<? extends OutputFormat> outputFormat = SequenceFileOutputFormat.class;
            if (tType.keyType() instanceof StringType && tType.valueType() instanceof StringType) {
                outputFormat = TextOutputFormat.class;
            }
            MultipleOutputs.addNamedOutput(job, entry.getValue() + "", outputFormat,
                    getHadoopType(tType.keyType()), getHadoopType(tType.valueType()));
        } else {
            Class<? extends OutputFormat> outputFormat = SequenceFileOutputFormat.class;
            if (rType.elementType() instanceof StringType) {
                outputFormat = TextOutputFormat.class;
            }
            MultipleOutputs.addNamedOutput(job, entry.getValue() + "", outputFormat, NullWritable.class,
                    getHadoopType(rType.elementType()));
        }
    }
    /**
     * Define Reducer & Combiner
     */
    job.setCombinerClass(MSCRCombiner.class);
    job.setReducerClass(MSCRReducer.class);

    job.setNumReduceTasks(1);
    return job;
}

From source file:com.telefonica.iot.tidoop.mrlib.jobs.Filter.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // check the number of arguments, show the usage if it is wrong
    if (args.length != 3) {
        showUsage();//www  . j  a va2s .  co m
        return -1;
    } // if

    // get the arguments
    String input = args[0];
    String output = args[1];
    String regex = args[2];

    // create and configure a MapReduce job
    Configuration conf = this.getConf();
    conf.set(Constants.PARAM_REGEX, regex);
    Job job = Job.getInstance(conf, "tidoop-mr-lib-filter");
    job.setNumReduceTasks(1);
    job.setJarByClass(Filter.class);
    job.setMapperClass(LineFilter.class);
    job.setCombinerClass(LinesCombiner.class);
    job.setReducerClass(LinesJoiner.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    // run the MapReduce job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.telefonica.iot.tidoop.mrlib.jobs.MapOnly.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // check the number of arguments, show the usage if it is wrong
    if (args.length != 3) {
        showUsage();//from  w ww  .  j  ava 2  s  .c om
        return -1;
    } // if

    // get the arguments
    String input = args[0];
    String output = args[1];
    String mapFunction = args[2];

    // create and configure a MapReduce job
    Configuration conf = this.getConf();
    conf.set(Constants.PARAM_FUNCTION, mapFunction);
    Job job = Job.getInstance(conf, "tidoop-mr-lib-maponly");
    job.setNumReduceTasks(0);
    job.setJarByClass(MapOnly.class);
    job.setMapperClass(CustomMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    // run the MapReduce job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.teradata.compaction.mapreduce.MergeParquetFilesMR.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "MergeParquet");

    if (args.length != 2) {
        System.err.println("Usage: java -jar MergeParquetFilesMR path_to_input_folder path_to_output_folder ");
        System.exit(0);//from w w w .  j  a  v  a 2  s  . co m
    }

    final Path inputPath = new Path(args[0]);
    final Path out = new Path(args[1]);

    Schema schemaParquetFile = getBaseSchema(inputPath, conf);
    job.setJarByClass(MergeParquetFilesMR.class);
    job.setMapperClass(SampleParquetMapper.class);
    job.setReducerClass(SampleParquetReducer.class);
    job.setInputFormatClass(AvroParquetInputFormat.class);
    job.setOutputFormatClass(AvroParquetOutputFormat.class);
    job.setMapOutputKeyClass(NullWritable.class);

    AvroJob.setMapOutputValueSchema(job, schemaParquetFile);
    AvroParquetOutputFormat.setSchema(job, schemaParquetFile);
    FileInputFormat.addInputPath(job, inputPath);
    AvroParquetOutputFormat.setOutputPath(job, out);
    job.setNumReduceTasks(1);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}