Example usage for org.apache.hadoop.util GenericOptionsParser GenericOptionsParser

List of usage examples for org.apache.hadoop.util GenericOptionsParser GenericOptionsParser

Introduction

In this page you can find the example usage for org.apache.hadoop.util GenericOptionsParser GenericOptionsParser.

Prototype

public GenericOptionsParser(Configuration conf, String[] args) throws IOException 

Source Link

Document

Create a GenericOptionsParser to parse only the generic Hadoop arguments.

Usage

From source file:WordCount_SiCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//  w ww .  j a  va  2  s .  co  m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount_SiCombiner.class);
    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);

    // disable combiner
    // job.setCombinerClass(IntSumReducer.class);

    job.setPartitionerClass(WordPartition.class);
    job.setNumReduceTasks(5);

    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:GetRetweetersAndCountPerUser.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: GetRetweetersAndCountPerUser <in> <out> <num_reducers>");
        System.exit(2);//from   w w w. j av a  2 s. com
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(RetweetersPerUser.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    System.out.println(otherArgs[0]);
    job.setMapperClass(TweetMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    if (job.waitForCompletion(true)) {
        FileSystem hdfs = FileSystem.get(new URI(args[1]), conf);
        Path dir = new Path(args[1]);
        PathFilter filter = new PathFilter() {
            public boolean accept(Path file) {
                return file.getName().startsWith("part-r-");
            }
        };

        HashMap<Integer, Integer> counts_for_user = new HashMap<Integer, Integer>();
        FileStatus[] files = hdfs.listStatus(dir, filter);
        Arrays.sort(files);
        for (int i = 0; i != files.length; i++) {
            Path pt = files[i].getPath();
            BufferedReader br = new BufferedReader(new InputStreamReader(hdfs.open(pt)));
            String line = null;
            while ((line = br.readLine()) != null) {
                String[] columns = new String[2];
                columns = line.split("\t");
                int key = Integer.parseInt(columns[0]);
                if (counts_for_user.containsKey(key))
                    counts_for_user.put(key, counts_for_user.get(key) + 1);
                else
                    counts_for_user.put(key, 1);
            }
            br.close();
        }

        FSDataOutputStream fsDataOutputStream = hdfs.create(new Path(otherArgs[1] + "_count"));
        PrintWriter writer = new PrintWriter(fsDataOutputStream);
        for (Entry<Integer, Integer> e : counts_for_user.entrySet()) {
            writer.write(e.getKey() + "\t" + e.getValue() + "\n");
        }
        writer.close();
        fsDataOutputStream.close();
        hdfs.close();
        System.exit(0);
    }
    System.exit(1);
}

From source file:WordCount_PerMapTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from   w w  w  .  j  a  v a  2  s .c  o m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount_PerMapTally.class);
    job.setMapperClass(TokenizerMapper.class);

    // disable combiner
    // job.setCombinerClass(IntSumReducer.class);

    job.setPartitionerClass(WordPartitioner.class);
    job.setNumReduceTasks(5);

    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:SiCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from w ww . j  ava2  s . c  o m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(SiCombiner.class);
    job.setMapperClass(TokenizerMapper.class);
    // Aniket changes starts
    /* Here the partitioner is being called*/
    job.setPartitionerClass(WordPartitioner.class);
    // Aniket changes ends
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:PerMapTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from  w w w . jav a 2  s .c  o  m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(PerMapTally.class);
    job.setMapperClass(TokenizerMapper.class);
    // Aniket changes starts
    /* Here the partitioner is being called*/
    job.setPartitionerClass(WordPartitioner.class);
    // Aniket changes ends
    // Part 3 Aniket changes starts
    /* Here I am just disabling the combiner */
    // job.setCombinerClass(IntSumReducer.class);
    // Part 3 Aniket changes ends
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:ImportTsv.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    setConf(HBaseConfiguration.create(getConf()));
    String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
    if (otherArgs.length < 2) {
        usage("Wrong number of arguments: " + otherArgs.length);
        return -1;
    }//from  w w  w. ja  v a  2s .co  m

    // When MAPPER_CONF_KEY is null, the user wants to use the provided TsvImporterMapper, so
    // perform validation on these additional args. When it's not null, user has provided their
    // own mapper, thus these validation are not relevant.
    // TODO: validation for TsvImporterMapper, not this tool. Move elsewhere.
    if (null == getConf().get(MAPPER_CONF_KEY)) {
        // Make sure columns are specified
        String columns[] = getConf().getStrings(COLUMNS_CONF_KEY);
        if (columns == null) {
            usage("No columns specified. Please specify with -D" + COLUMNS_CONF_KEY + "=...");
            return -1;
        }

        // Make sure they specify exactly one column as the row key
        int rowkeysFound = 0;
        for (String col : columns) {
            if (col.equals(TsvParser.ROWKEY_COLUMN_SPEC))
                rowkeysFound++;
        }
        if (rowkeysFound != 1) {
            usage("Must specify exactly one column as " + TsvParser.ROWKEY_COLUMN_SPEC);
            return -1;
        }

        // Make sure we have at most one column as the timestamp key
        int tskeysFound = 0;
        for (String col : columns) {
            if (col.equals(TsvParser.TIMESTAMPKEY_COLUMN_SPEC))
                tskeysFound++;
        }
        if (tskeysFound > 1) {
            usage("Must specify at most one column as " + TsvParser.TIMESTAMPKEY_COLUMN_SPEC);
            return -1;
        }

        int attrKeysFound = 0;
        for (String col : columns) {
            if (col.equals(TsvParser.ATTRIBUTES_COLUMN_SPEC))
                attrKeysFound++;
        }
        if (attrKeysFound > 1) {
            usage("Must specify at most one column as " + TsvParser.ATTRIBUTES_COLUMN_SPEC);
            return -1;
        }

        // Make sure one or more columns are specified excluding rowkey and
        // timestamp key
        if (columns.length - (rowkeysFound + tskeysFound + attrKeysFound) < 1) {
            usage("One or more columns in addition to the row key and timestamp(optional) are required");
            return -1;
        }
    }

    // If timestamp option is not specified, use current system time.
    long timstamp = getConf().getLong(TIMESTAMP_CONF_KEY, System.currentTimeMillis());

    // Set it back to replace invalid timestamp (non-numeric) with current
    // system time
    getConf().setLong(TIMESTAMP_CONF_KEY, timstamp);

    Job job = createSubmittableJob(getConf(), otherArgs);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:ImageDuplicatesRemover.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    //This is the line that makes the hadoop run locally
    //conf.set("mapred.job.tracker", "local");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from w ww  .  j  av  a 2  s .  c  om*/
    }
    Job job = new Job(conf, "image dups remover");
    job.setJarByClass(ImageDuplicatesRemover.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(ImageMd5Mapper.class);
    job.setReducerClass(ImageDupsReducer.class);
    //job.setNumReduceTasks(2);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);

}

From source file:ReverseIndexer.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: ReverseIndexer <output> <input file(s)>");
        System.exit(2);/*from   w ww. j  a  va 2  s. c o m*/
    }
    Job job = new Job(conf, "reverse indexer");
    job.setJarByClass(ReverseIndexer.class);
    job.setMapperClass(IndexerMapper.class);
    job.setReducerClass(IndexerReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LineRecWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    for (int i = 1; i < otherArgs.length; i++) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[0]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Authset.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from   www .j  a v a  2 s . co  m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(Authset.class);
    job.setMapperClass(TokenizerMapper.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(10);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    //DistributedCache.addCacheFile(new Path(otherArgs[0]).toUri(),
    //job.getConfiguration());

    //DistributedCache.setLocalFiles(job.getConfiguration(), otherArgs[0]);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:RHBlockToKeyRangeIndex.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: rhblockindex <in> <out>");
        System.exit(2);/*from  ww  w  .j a va 2 s .  c  o m*/
    }
    Job job = new Job(conf, "rhblockindex");
    job.setJarByClass(RHBlockToKeyRangeIndex.class);
    job.setMapperClass(RMapper.class);
    job.setCombinerClass(RReducer.class);
    job.setReducerClass(RReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}