List of usage examples for org.apache.mahout.utils SplitInputJob run
@SuppressWarnings("rawtypes") public static void run(Configuration initialConf, Path inputPath, Path outputPath, int keepPct, float randomSelectionPercent) throws IOException, ClassNotFoundException, InterruptedException
From source file:com.netease.news.utils.SplitInput.java
License:Apache License
public void splitDirectory(Configuration conf, Path inputDir) throws IOException, ClassNotFoundException, InterruptedException { FileSystem fs = inputDir.getFileSystem(conf); if (fs.getFileStatus(inputDir) == null) { throw new IOException(inputDir + " does not exist"); }//from ww w.ja va 2 s . c om if (!fs.getFileStatus(inputDir).isDir()) { throw new IOException(inputDir + " is not a directory"); } if (useMapRed) { SplitInputJob.run(conf, inputDir, mapRedOutputDirectory, keepPct, testRandomSelectionPct); } else { // input dir contains one file per category. FileStatus[] fileStats = fs.listStatus(inputDir, PathFilters.logsCRCFilter()); for (FileStatus inputFile : fileStats) { if (!inputFile.isDir()) { splitFile(inputFile.getPath()); } } } }