Example usage for org.apache.hadoop.mapred FileInputFormat setInputPathFilter

List of usage examples for org.apache.hadoop.mapred FileInputFormat setInputPathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileInputFormat setInputPathFilter.

Prototype

public static void setInputPathFilter(JobConf conf, Class<? extends PathFilter> filter) 

Source Link

Document

Set a PathFilter to be applied to the input paths for the map-reduce job.

Usage

From source file:edu.ub.ahstfg.kmeans.KmeansIteration.java

License:Open Source License

@Override
public int run(String[] args) throws IOException, URISyntaxException {
    job = new JobConf(getConf());
    params.toJobConf(job);//from w w  w  . j  a  v a 2  s .  com
    job.setInt(ParamSet.N_ITERATION, nIter);

    LOG.info("Iteration " + nIter + " > Iniciating Kmeans iteration " + nIter);
    job.setJarByClass(KmeansIteration.class); // TODO may change for Clusterizer.class

    LOG.info("Iteration " + nIter + " > Setting input path to '" + inputPath + "'");
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileInputFormat.setInputPathFilter(job, SampleFilter.class);

    LOG.info("Iteration " + nIter + " > Clearing the output path at '" + outputPath + "'");
    FileSystem fs = FileSystem.get(new URI(outputPath), job);

    if (fs.exists(new Path(outputPath))) {
        fs.delete(new Path(outputPath), true);
    }

    LOG.info("Iteration " + nIter + " > Setting output path to '" + outputPath + "'");
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);

    LOG.info("Iteration " + nIter + " > Setting input format.");
    job.setInputFormat(IndexInputFormat.class);
    LOG.info("Iteration " + nIter + " > Setting output format.");
    job.setOutputFormat(TextOutputFormat.class);

    LOG.info("Iteration " + nIter + " > Setting output data types.");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    LOG.info("Iteration " + nIter + " > Setting mapper and reducer.");
    job.setMapperClass(KmeansMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DocumentDistance.class);
    job.setReducerClass(KmeansReducer.class);

    LOG.info("Iteration " + nIter + " > Running job...");
    boolean done = JobClient.runJob(job).isSuccessful();
    LOG.info("Iteration " + nIter + " > Job done.");
    if (done) {
        return 0;
    } else {
        return 1;
    }
}