List of usage examples for org.apache.hadoop.mapred FileInputFormat setInputPathFilter
public static void setInputPathFilter(JobConf conf, Class<? extends PathFilter> filter)
From source file:edu.ub.ahstfg.kmeans.KmeansIteration.java
License:Open Source License
@Override public int run(String[] args) throws IOException, URISyntaxException { job = new JobConf(getConf()); params.toJobConf(job);//from w w w . j a v a 2 s . com job.setInt(ParamSet.N_ITERATION, nIter); LOG.info("Iteration " + nIter + " > Iniciating Kmeans iteration " + nIter); job.setJarByClass(KmeansIteration.class); // TODO may change for Clusterizer.class LOG.info("Iteration " + nIter + " > Setting input path to '" + inputPath + "'"); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileInputFormat.setInputPathFilter(job, SampleFilter.class); LOG.info("Iteration " + nIter + " > Clearing the output path at '" + outputPath + "'"); FileSystem fs = FileSystem.get(new URI(outputPath), job); if (fs.exists(new Path(outputPath))) { fs.delete(new Path(outputPath), true); } LOG.info("Iteration " + nIter + " > Setting output path to '" + outputPath + "'"); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileOutputFormat.setCompressOutput(job, false); LOG.info("Iteration " + nIter + " > Setting input format."); job.setInputFormat(IndexInputFormat.class); LOG.info("Iteration " + nIter + " > Setting output format."); job.setOutputFormat(TextOutputFormat.class); LOG.info("Iteration " + nIter + " > Setting output data types."); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); LOG.info("Iteration " + nIter + " > Setting mapper and reducer."); job.setMapperClass(KmeansMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DocumentDistance.class); job.setReducerClass(KmeansReducer.class); LOG.info("Iteration " + nIter + " > Running job..."); boolean done = JobClient.runJob(job).isSuccessful(); LOG.info("Iteration " + nIter + " > Job done."); if (done) { return 0; } else { return 1; } }