Example usage for org.apache.hadoop.mapreduce.lib.input FileInputFormat addInputPaths

List of usage examples for org.apache.hadoop.mapreduce.lib.input FileInputFormat addInputPaths

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input FileInputFormat addInputPaths.

Prototype

public static void addInputPaths(Job job, String commaSeparatedPaths) throws IOException 

Source Link

Document

Add the given comma separated paths to the list of inputs for the map-reduce job.

Usage

From source file:a.b.c.MultiFileWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        printUsage();//w w w.ja  va 2 s.c  om
        return 2;
    }

    Job job = new Job(getConf());
    job.setJobName("MultiFileWordCount");
    job.setJarByClass(MultiFileWordCount.class);

    //set the InputFormat of the job to our InputFormat
    job.setInputFormatClass(MyInputFormat.class);

    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(IntWritable.class);

    //use the defined mapper
    job.setMapperClass(MapClass.class);
    //use the WordCount Reducer
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.ifeng.logparser.NginxLogDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from w  w w  .  ja  v a2s  . co  m

    Job job = Job.getInstance(super.getConf());
    FileInputFormat.setInputDirRecursive(job, true);

    //FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    FileInputFormat.addInputPaths(job, args[0]);

    job.setMapperClass(NginxLogMapper.class);
    job.setReducerClass(NginxLogReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.metamx.druid.indexer.path.StaticPathSpec.java

License:Open Source License

@Override
public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException {
    log.info("Adding paths[%s]", paths);
    FileInputFormat.addInputPaths(job, paths);
    return job;//  w w  w  .ja  v  a  2 s .co  m
}

From source file:com.phantom.hadoop.examples.MultiFileWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        printUsage();//  www . j  ava  2  s.  c om
        return 2;
    }

    Job job = new Job(getConf());
    job.setJobName("MultiFileWordCount");
    job.setJarByClass(MultiFileWordCount.class);

    // set the InputFormat of the job to our InputFormat
    job.setInputFormatClass(MyInputFormat.class);

    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(IntWritable.class);

    // use the defined mapper
    job.setMapperClass(MapClass.class);
    // use the WordCount Reducer
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.yourcompany.hadoop.mapreduce.aggregate.UnionDriver.java

License:Apache License

private void parseArguements(String[] args, Job job) throws IOException {
    for (int i = 0; i < args.length; ++i) {
        if ("-input".equals(args[i])) {
            FileInputFormat.addInputPaths(job, args[++i]);
        } else if ("-output".equals(args[i])) {
            FileOutputFormat.setOutputPath(job, new Path(args[++i]));
        }//  w  ww . j  av a2s.co m
    }
}

From source file:com.yourcompany.hadoop.mapreduce.hcatalog.HCatalogExampleDriver.java

License:Apache License

private void parseArguements(String[] args, Job job) throws IOException {

    String outputTableName = null;
    String dbName = null;//from   w w  w  .  ja v  a2s .co  m
    String inputTableName = null;

    for (int i = 0; i < args.length; ++i) {
        if ("-input".equals(args[i])) {
            FileInputFormat.addInputPaths(job, args[++i]);
        } else if ("-output".equals(args[i])) {
            FileOutputFormat.setOutputPath(job, new Path(args[++i]));
        } else if ("-dbName".equals(args[i])) {
            dbName = args[++i];
        } else if ("-inputTableName".equals(args[i])) {
            inputTableName = args[++i];
        } else if ("-outputTableName".equals(args[i])) {
            outputTableName = args[++i];
        }
    }

    HCatInputFormat.setInput(job.getConfiguration(), dbName, inputTableName);
    HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
    HCatSchema s = HCatOutputFormat.getTableSchema(job.getConfiguration());
    HCatOutputFormat.setSchema(job, s);
}

From source file:com.yourcompany.hadoop.mapreduce.KoreanWordcountDriver.java

License:Apache License

private void parseArguements(String[] args, Job job) throws IOException {
    for (int i = 0; i < args.length; ++i) {
        if ("-input".equals(args[i])) {
            FileInputFormat.addInputPaths(job, args[++i]);
        } else if ("-output".equals(args[i])) {
            FileOutputFormat.setOutputPath(job, new Path(args[++i]));
        } else if ("-exactMatch".equals(args[i])) {
            job.getConfiguration().set("exactMatch", args[++i]);
        } else if ("-bigrammable".equals(args[i])) {
            job.getConfiguration().set("bigrammable", args[++i]);
        } else if ("-hasOrigin".equals(args[i])) {
            job.getConfiguration().set("hasOrigin", args[++i]);
        } else if ("-originCNoun".equals(args[i])) {
            job.getConfiguration().set("originCNoun", args[++i]);
        } else if ("-reducer".equals(args[i])) {
            job.setNumReduceTasks(Integer.parseInt(args[++i]));
        } else if ("-minSupport".equals(args[i])) {
            job.getConfiguration().set("minSupport", args[++i]);
        }/*from  w  w w.  j  ava2 s .c  o m*/
    }
}

From source file:com.yourcompany.hadoop.mapreduce.lexical.LexicalAnalyzerDriver.java

License:Apache License

private void parseArguements(String[] args, Job job) throws IOException {
    for (int i = 0; i < args.length; ++i) {
        if ("-input".equals(args[i])) {
            FileInputFormat.addInputPaths(job, args[++i]);
        } else if ("-output".equals(args[i])) {
            FileOutputFormat.setOutputPath(job, new Path(args[++i]));
        } else if ("-indexmode".equals(args[i])) {
            job.getConfiguration().set("indexmode", args[++i]);
        } else if ("-reducer".equals(args[i])) {
            job.setNumReduceTasks(Integer.parseInt(args[++i]));
        }/*from  w ww  .  j  a  va2s  . com*/
    }
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.ConfigurationHelper.java

License:Apache License

/**
 * Job configurator/*from   ww w  . j  a  va 2  s  .  co m*/
 *
 * @param job                      job instance
 * @param jarByClass               class of the jar
 * @param mapperClass              mapper
 * @param reducerClass             reducer
 * @param commaSeparatedInputFiles input paths
 * @param outputPath               output
 * @throws IOException I/O exception
 */
public static void configureJob(Job job, Class<?> jarByClass, Class<? extends Mapper> mapperClass,
        Class<? extends Reducer> reducerClass, String commaSeparatedInputFiles, String outputPath)
        throws IOException {
    job.setJarByClass(jarByClass);
    job.setJobName(jarByClass.getName());

    // mapper
    job.setMapperClass(mapperClass);

    // reducer
    job.setReducerClass(reducerClass);

    // input-output is warc
    job.setInputFormatClass(WARCInputFormat.class);
    // prevent producing empty files
    LazyOutputFormat.setOutputFormatClass(job, WARCOutputFormat.class);

    // intermediate data
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(WARCWritable.class);

    // output data
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(WARCWritable.class);

    // set output compression to GZip
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.examples.SimpleTextSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance();//  ww w  . ja  v a  2s.  c  om
    job.setJarByClass(SimpleTextSearch.class);

    job.setJobName(SimpleTextSearch.class.getName());

    // mapper
    job.setMapperClass(TextSearchMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    // combiner + reducer
    job.setCombinerClass(TextLongCountingReducer.class);
    job.setReducerClass(TextLongCountingReducer.class);

    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = otherArgs[0];
    String outputPath = otherArgs[1];

    // regex with a phrase to be searched for
    String regex = otherArgs[2];
    job.getConfiguration().set(MAPREDUCE_MAP_REGEX, regex);

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}