Example usage for org.apache.hadoop.mapreduce.lib.map RegexMapper PATTERN

List of usage examples for org.apache.hadoop.mapreduce.lib.map RegexMapper PATTERN

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.map RegexMapper PATTERN.

Prototype

String PATTERN

To view the source code for org.apache.hadoop.mapreduce.lib.map RegexMapper PATTERN.

Click Source Link

Usage

From source file:com.phantom.hadoop.examples.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//w w w .ja v a 2  s  . c o m

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = new Job(conf);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        Job sortJob = new Job(conf);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq
                LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}

From source file:io.aos.mapreduce.grep.GrepTool.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        org.apache.hadoop.util.Tool t;
        return 2;
    }/*from  ww w  .j av  a2  s. co  m*/

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);

    if (args.length == 4) {
        conf.set(RegexMapper.GROUP, args[3]);
    }

    try {

        Job greJob = Job.getInstance(conf);
        greJob.setJobName("GrepSearch");

        FileInputFormat.setInputPaths(greJob, args[0]);

        greJob.setMapperClass(RegexMapper.class);
        greJob.setCombinerClass(LongSumReducer.class);
        greJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(greJob, tempDir);
        greJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        greJob.setOutputKeyClass(Text.class);
        greJob.setOutputValueClass(LongWritable.class);

        greJob.waitForCompletion(true);

        Job sortJob = Job.getInstance(conf);
        sortJob.setJobName("GrepSort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        // Write a single file
        sortJob.setNumReduceTasks(1);

        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq

                LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);

    }

    catch (Exception e) {
        return 2;
    }

    finally {
        FileSystem.get(conf).delete(tempDir, true);
    }

    return 0;

}

From source file:myGrep.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }// w  ww  .  java  2  s  . c  om
    System.out.println("0:" + args[0] + " 1:" + args[1] + " 2:" + args[2]);

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = Job.getInstance(conf);

    try {
        init_log();

        grepJob.setJobName("wzl-grep-search");
        grepJob.setJarByClass(Grep.class);

        FileInputFormat.setInputPaths(grepJob, args[0]);

        //grepJob.setMapperClass(RegexMapper.class);
        grepJob.setMapperClass(myRegMapper.class);

        //(string, int) -> sub int -> (string , sum)
        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        //FileOutputFormat.setOutputPath(grepJob, tempDir);
        FileOutputFormat.setOutputPath(grepJob, new Path(args[1]));
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        /*
                
        Job sortJob = Job.getInstance(conf);
        sortJob.setJobName("wzl-grep-sort");
        sortJob.setJarByClass(Grep.class);
                
        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);
                
        sortJob.setMapperClass(InverseMapper.class);
                
        sortJob.setNumReduceTasks(1);                 // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass(          // sort by decreasing freq
          LongWritable.DecreasingComparator.class);
                
        sortJob.waitForCompletion(true);
        */
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}

From source file:org.apache.hadoop.examples.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//from  w ww. j a  va  2 s.c o  m

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = Job.getInstance(conf);

    try {

        grepJob.setJobName("grep-search");
        grepJob.setJarByClass(Grep.class);

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        Job sortJob = Job.getInstance(conf);
        sortJob.setJobName("grep-sort");
        sortJob.setJarByClass(Grep.class);

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq
                LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}