Example usage for org.apache.hadoop.mapred FileInputFormat setInputPathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileInputFormat setInputPathFilter.

Prototype

public static void setInputPathFilter(JobConf conf, Class<? extends PathFilter> filter)

Source Link

Document

Set a PathFilter to be applied to the input paths for the map-reduce job.

Usage

From source file:edu.ub.ahstfg.kmeans.KmeansIteration.java

License:Open Source License

@Override
public int run(String[] args) throws IOException, URISyntaxException {
    job = new JobConf(getConf());
    params.toJobConf(job);//from w w  w  . j  a  v a 2  s .  com
    job.setInt(ParamSet.N_ITERATION, nIter);

    LOG.info("Iteration " + nIter + " > Iniciating Kmeans iteration " + nIter);
    job.setJarByClass(KmeansIteration.class); // TODO may change for Clusterizer.class

    LOG.info("Iteration " + nIter + " > Setting input path to '" + inputPath + "'");
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileInputFormat.setInputPathFilter(job, SampleFilter.class);

    LOG.info("Iteration " + nIter + " > Clearing the output path at '" + outputPath + "'");
    FileSystem fs = FileSystem.get(new URI(outputPath), job);

    if (fs.exists(new Path(outputPath))) {
        fs.delete(new Path(outputPath), true);
    }

    LOG.info("Iteration " + nIter + " > Setting output path to '" + outputPath + "'");
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);

    LOG.info("Iteration " + nIter + " > Setting input format.");
    job.setInputFormat(IndexInputFormat.class);
    LOG.info("Iteration " + nIter + " > Setting output format.");
    job.setOutputFormat(TextOutputFormat.class);

    LOG.info("Iteration " + nIter + " > Setting output data types.");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    LOG.info("Iteration " + nIter + " > Setting mapper and reducer.");
    job.setMapperClass(KmeansMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DocumentDistance.class);
    job.setReducerClass(KmeansReducer.class);

    LOG.info("Iteration " + nIter + " > Running job...");
    boolean done = JobClient.runJob(job).isSuccessful();
    LOG.info("Iteration " + nIter + " > Job done.");
    if (done) {
        return 0;
    } else {
        return 1;
    }
}