List of usage examples for org.apache.hadoop.util GenericOptionsParser GenericOptionsParser
public GenericOptionsParser(Configuration conf, String[] args) throws IOException
GenericOptionsParser to parse only the generic Hadoop arguments. Usage
From source file:WordCount_SiCombiner.java
License:Apache License
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
GenericOptionsParser parser = new GenericOptionsParser(conf, args);
String[] otherArgs = parser.getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);// w ww . j a va 2 s . co m
}
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount_SiCombiner.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
// disable combiner
// job.setCombinerClass(IntSumReducer.class);
job.setPartitionerClass(WordPartition.class);
job.setNumReduceTasks(5);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
From source file:GetRetweetersAndCountPerUser.java
License:Apache License
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 3) {
System.err.println("Usage: GetRetweetersAndCountPerUser <in> <out> <num_reducers>");
System.exit(2);//from w w w. j av a 2 s. com
}
Job job = new Job(conf, "word count");
job.setJarByClass(RetweetersPerUser.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
System.out.println(otherArgs[0]);
job.setMapperClass(TweetMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
job.setNumReduceTasks(Integer.parseInt(args[2]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
if (job.waitForCompletion(true)) {
FileSystem hdfs = FileSystem.get(new URI(args[1]), conf);
Path dir = new Path(args[1]);
PathFilter filter = new PathFilter() {
public boolean accept(Path file) {
return file.getName().startsWith("part-r-");
}
};
HashMap<Integer, Integer> counts_for_user = new HashMap<Integer, Integer>();
FileStatus[] files = hdfs.listStatus(dir, filter);
Arrays.sort(files);
for (int i = 0; i != files.length; i++) {
Path pt = files[i].getPath();
BufferedReader br = new BufferedReader(new InputStreamReader(hdfs.open(pt)));
String line = null;
while ((line = br.readLine()) != null) {
String[] columns = new String[2];
columns = line.split("\t");
int key = Integer.parseInt(columns[0]);
if (counts_for_user.containsKey(key))
counts_for_user.put(key, counts_for_user.get(key) + 1);
else
counts_for_user.put(key, 1);
}
br.close();
}
FSDataOutputStream fsDataOutputStream = hdfs.create(new Path(otherArgs[1] + "_count"));
PrintWriter writer = new PrintWriter(fsDataOutputStream);
for (Entry<Integer, Integer> e : counts_for_user.entrySet()) {
writer.write(e.getKey() + "\t" + e.getValue() + "\n");
}
writer.close();
fsDataOutputStream.close();
hdfs.close();
System.exit(0);
}
System.exit(1);
}
From source file:WordCount_PerMapTally.java
License:Apache License
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
GenericOptionsParser parser = new GenericOptionsParser(conf, args);
String[] otherArgs = parser.getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);//from w w w . j a v a 2 s .c o m
}
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount_PerMapTally.class);
job.setMapperClass(TokenizerMapper.class);
// disable combiner
// job.setCombinerClass(IntSumReducer.class);
job.setPartitionerClass(WordPartitioner.class);
job.setNumReduceTasks(5);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
From source file:SiCombiner.java
License:Apache License
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);/*from w ww . j ava2 s . c o m*/
}
Job job = new Job(conf, "word count");
job.setJarByClass(SiCombiner.class);
job.setMapperClass(TokenizerMapper.class);
// Aniket changes starts
/* Here the partitioner is being called*/
job.setPartitionerClass(WordPartitioner.class);
// Aniket changes ends
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
From source file:PerMapTally.java
License:Apache License
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);/*from w w w . jav a 2 s .c o m*/
}
Job job = new Job(conf, "word count");
job.setJarByClass(PerMapTally.class);
job.setMapperClass(TokenizerMapper.class);
// Aniket changes starts
/* Here the partitioner is being called*/
job.setPartitionerClass(WordPartitioner.class);
// Aniket changes ends
// Part 3 Aniket changes starts
/* Here I am just disabling the combiner */
// job.setCombinerClass(IntSumReducer.class);
// Part 3 Aniket changes ends
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
From source file:ImportTsv.java
License:Apache License
@Override
public int run(String[] args) throws Exception {
setConf(HBaseConfiguration.create(getConf()));
String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
if (otherArgs.length < 2) {
usage("Wrong number of arguments: " + otherArgs.length);
return -1;
}//from w w w. ja v a 2s .co m
// When MAPPER_CONF_KEY is null, the user wants to use the provided TsvImporterMapper, so
// perform validation on these additional args. When it's not null, user has provided their
// own mapper, thus these validation are not relevant.
// TODO: validation for TsvImporterMapper, not this tool. Move elsewhere.
if (null == getConf().get(MAPPER_CONF_KEY)) {
// Make sure columns are specified
String columns[] = getConf().getStrings(COLUMNS_CONF_KEY);
if (columns == null) {
usage("No columns specified. Please specify with -D" + COLUMNS_CONF_KEY + "=...");
return -1;
}
// Make sure they specify exactly one column as the row key
int rowkeysFound = 0;
for (String col : columns) {
if (col.equals(TsvParser.ROWKEY_COLUMN_SPEC))
rowkeysFound++;
}
if (rowkeysFound != 1) {
usage("Must specify exactly one column as " + TsvParser.ROWKEY_COLUMN_SPEC);
return -1;
}
// Make sure we have at most one column as the timestamp key
int tskeysFound = 0;
for (String col : columns) {
if (col.equals(TsvParser.TIMESTAMPKEY_COLUMN_SPEC))
tskeysFound++;
}
if (tskeysFound > 1) {
usage("Must specify at most one column as " + TsvParser.TIMESTAMPKEY_COLUMN_SPEC);
return -1;
}
int attrKeysFound = 0;
for (String col : columns) {
if (col.equals(TsvParser.ATTRIBUTES_COLUMN_SPEC))
attrKeysFound++;
}
if (attrKeysFound > 1) {
usage("Must specify at most one column as " + TsvParser.ATTRIBUTES_COLUMN_SPEC);
return -1;
}
// Make sure one or more columns are specified excluding rowkey and
// timestamp key
if (columns.length - (rowkeysFound + tskeysFound + attrKeysFound) < 1) {
usage("One or more columns in addition to the row key and timestamp(optional) are required");
return -1;
}
}
// If timestamp option is not specified, use current system time.
long timstamp = getConf().getLong(TIMESTAMP_CONF_KEY, System.currentTimeMillis());
// Set it back to replace invalid timestamp (non-numeric) with current
// system time
getConf().setLong(TIMESTAMP_CONF_KEY, timstamp);
Job job = createSubmittableJob(getConf(), otherArgs);
return job.waitForCompletion(true) ? 0 : 1;
}
From source file:ImageDuplicatesRemover.java
License:Apache License
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
//This is the line that makes the hadoop run locally
//conf.set("mapred.job.tracker", "local");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);/*from w ww . j av a 2 s . c om*/
}
Job job = new Job(conf, "image dups remover");
job.setJarByClass(ImageDuplicatesRemover.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setMapperClass(ImageMd5Mapper.class);
job.setReducerClass(ImageDupsReducer.class);
//job.setNumReduceTasks(2);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
From source file:ReverseIndexer.java
License:Apache License
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: ReverseIndexer <output> <input file(s)>");
System.exit(2);/*from w ww. j a va 2 s. c o m*/
}
Job job = new Job(conf, "reverse indexer");
job.setJarByClass(ReverseIndexer.class);
job.setMapperClass(IndexerMapper.class);
job.setReducerClass(IndexerReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LineRecWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
for (int i = 1; i < otherArgs.length; i++) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job, new Path(otherArgs[0]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
From source file:Authset.java
License:Apache License
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);//from www .j a v a 2 s . co m
}
Job job = new Job(conf, "word count");
job.setJarByClass(Authset.class);
job.setMapperClass(TokenizerMapper.class);
//job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setNumReduceTasks(10);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
//DistributedCache.addCacheFile(new Path(otherArgs[0]).toUri(),
//job.getConfiguration());
//DistributedCache.setLocalFiles(job.getConfiguration(), otherArgs[0]);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
From source file:RHBlockToKeyRangeIndex.java
License:Apache License
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: rhblockindex <in> <out>");
System.exit(2);/*from ww w .j a va 2 s . c o m*/
}
Job job = new Job(conf, "rhblockindex");
job.setJarByClass(RHBlockToKeyRangeIndex.class);
job.setMapperClass(RMapper.class);
job.setCombinerClass(RReducer.class);
job.setReducerClass(RReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}