List of usage examples for org.apache.hadoop.fs PathFilter PathFilter
PathFilter
From source file:GetRetweetersAndCountPerUser.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: GetRetweetersAndCountPerUser <in> <out> <num_reducers>"); System.exit(2);/*from w ww. j ava2 s . c o m*/ } Job job = new Job(conf, "word count"); job.setJarByClass(RetweetersPerUser.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); System.out.println(otherArgs[0]); job.setMapperClass(TweetMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(Integer.parseInt(args[2])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); if (job.waitForCompletion(true)) { FileSystem hdfs = FileSystem.get(new URI(args[1]), conf); Path dir = new Path(args[1]); PathFilter filter = new PathFilter() { public boolean accept(Path file) { return file.getName().startsWith("part-r-"); } }; HashMap<Integer, Integer> counts_for_user = new HashMap<Integer, Integer>(); FileStatus[] files = hdfs.listStatus(dir, filter); Arrays.sort(files); for (int i = 0; i != files.length; i++) { Path pt = files[i].getPath(); BufferedReader br = new BufferedReader(new InputStreamReader(hdfs.open(pt))); String line = null; while ((line = br.readLine()) != null) { String[] columns = new String[2]; columns = line.split("\t"); int key = Integer.parseInt(columns[0]); if (counts_for_user.containsKey(key)) counts_for_user.put(key, counts_for_user.get(key) + 1); else counts_for_user.put(key, 1); } br.close(); } FSDataOutputStream fsDataOutputStream = hdfs.create(new Path(otherArgs[1] + "_count")); PrintWriter writer = new PrintWriter(fsDataOutputStream); for (Entry<Integer, Integer> e : counts_for_user.entrySet()) { writer.write(e.getKey() + "\t" + e.getValue() + "\n"); } writer.close(); fsDataOutputStream.close(); hdfs.close(); System.exit(0); } System.exit(1); }
From source file:HadoopUtilsTest.java
License:Apache License
public static void main(String[] args) throws IOException { Configuration confgiruration = HBaseConfiguration.create(); FileSystem fileSystem = null; try {/*www .ja v a 2s .co m*/ fileSystem = FileSystem.get(confgiruration); FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/icntv/grade/correlate-result/2013-12-12"), new PathFilter() { @Override public boolean accept(Path path) { return path.getName().matches("part-r-\\d*"); } }); for (FileStatus f : fileStatuses) { IOUtils.copyBytes(fileSystem.open(f.getPath()), System.out, 4096, false); } } catch (Exception e) { e.printStackTrace(); } finally { if (null != fileSystem) { fileSystem.close(); } } }
From source file:com.alexholmes.hadooputils.test.TextIOJobBuilder.java
License:Apache License
/** * Called after the MapReduce job has completed, to verify that the outputs * generated by the MapReduce job align with the expected outputs that were * set with calls to {@link #addExpectedOutput(String)} and * {@link #addExpectedOutput(String...)}. * * @return a reference to this object//from w w w .j a v a 2 s .c o m * @throws IOException if something goes wrong */ public TextIOJobBuilder verifyResults() throws IOException { FileStatus[] outputFiles = fs.listStatus(outputPath, new PathFilter() { @Override public boolean accept(final Path path) { return path.getName().startsWith("part"); } }); System.out.println("Output files: " + StringUtils.join(outputFiles)); int i = 0; for (FileStatus file : outputFiles) { List<String> actualLines = FileUtils.readLines(fs, file.getPath()); for (String actualLine : actualLines) { String expectedLine = expectedOutputs.get(i++); assertEquals(expectedLine, actualLine); } } assertEquals(expectedOutputs.size(), i); return this; }
From source file:com.architecting.ch07.MapReduceIndexerTool.java
License:Apache License
private FileStatus[] listSortedOutputShardDirs(Job job, Path outputReduceDir, FileSystem fs) throws FileNotFoundException, IOException { final String dirPrefix = SolrOutputFormat.getOutputName(job); FileStatus[] dirs = fs.listStatus(outputReduceDir, new PathFilter() { @Override/*from w w w. ja v a2s .com*/ public boolean accept(Path path) { return path.getName().startsWith(dirPrefix); } }); for (FileStatus dir : dirs) { if (!dir.isDirectory()) { throw new IllegalStateException("Not a directory: " + dir.getPath()); } } // use alphanumeric sort (rather than lexicographical sort) to properly handle more than 99999 // shards Arrays.sort(dirs, new Comparator<FileStatus>() { @Override public int compare(FileStatus f1, FileStatus f2) { return new AlphaNumericComparator().compare(f1.getPath().getName(), f2.getPath().getName()); } }); return dirs; }
From source file:com.bah.lucene.hdfs.HdfsDirectory.java
License:Apache License
@Override public String[] listAll() throws IOException { LOG.debug(MessageFormat.format("listAll [{0}]", _path)); FileStatus[] files = _fileSystem.listStatus(_path, new PathFilter() { @Override/*from w w w . j a v a 2 s.c o m*/ public boolean accept(Path path) { try { return _fileSystem.isFile(path); } catch (IOException e) { throw new RuntimeException(e); } } }); String[] result = new String[files.length]; for (int i = 0; i < result.length; i++) { result[i] = files[i].getPath().getName(); } return result; }
From source file:com.bah.lucene.hdfs.SoftlinkHdfsDirectory.java
License:Apache License
@Override public String[] listAll() throws IOException { LOG.debug(MessageFormat.format("listAll [{0}]", _path)); FileStatus[] files = _fileSystem.listStatus(_path, new PathFilter() { @Override//from w w w . j a va2 s . c om public boolean accept(Path path) { try { return _fileSystem.isFile(path); } catch (IOException e) { throw new RuntimeException(e); } } }); String[] result = new String[files.length]; for (int i = 0; i < result.length; i++) { result[i] = removeLinkExtensionSuffix(files[i].getPath().getName()); } return result; }
From source file:com.blackberry.logtools.LogTools.java
License:Apache License
public void runPigRemote(Map<String, String> params, String out, String tmp, boolean quiet, boolean silent, Configuration conf, String queue_name, String additional_jars, File pig_tmp, ArrayList<String> D_options, String PIG_DIR, FileSystem fs) { //Set input parameter for pig job - calling Pig directly params.put("tmpdir", StringEscapeUtils.escapeJava(tmp)); //Check for an out of '-', meaning write to stdout String pigout;/*from ww w . ja va2 s .c o m*/ if (out.equals("-")) { params.put("out", tmp + "/final"); pigout = tmp + "/final"; } else { params.put("out", StringEscapeUtils.escapeJava(out)); pigout = StringEscapeUtils.escapeJava(out); } try { logConsole(quiet, silent, info, "Running PIG Command"); conf.set("mapred.job.queue.name", queue_name); conf.set("pig.additional.jars", additional_jars); conf.set("pig.exec.reducers.bytes.per.reducer", Integer.toString(100 * 1000 * 1000)); conf.set("pig.logfile", pig_tmp.toString()); conf.set("hadoopversion", "23"); //PIG temp directory set to be able to delete all temp files/directories conf.set("pig.temp.dir", tmp); //Setting output separator for logdriver String DEFAULT_OUTPUT_SEPARATOR = "\t"; Charset UTF_8 = Charset.forName("UTF-8"); String outputSeparator = conf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { logConsole(true, true, error, "The output separator must be a single byte in UTF-8."); System.exit(1); } conf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); dOpts(D_options, silent, out, conf); PigServer pigServer = new PigServer(ExecType.MAPREDUCE, conf); pigServer.registerScript(PIG_DIR + "/formatAndSort.pg", params); } catch (Exception e) { e.printStackTrace(); System.exit(1); } logConsole(quiet, silent, warn, "PIG Job Completed."); if (out.equals("-")) { System.out.println(";#################### DATA RESULTS ####################"); try { //Create filter to find files with the results from PIG job PathFilter filter = new PathFilter() { public boolean accept(Path file) { return file.getName().contains("part-"); } }; //Find the files in the directory, open and printout results FileStatus[] status = fs.listStatus(new Path(tmp + "/final"), filter); for (int i = 0; i < status.length; i++) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath()))); String line; line = br.readLine(); while (line != null) { System.out.println(line); line = br.readLine(); } } System.out.println(";#################### END OF RESULTS ####################"); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } else { System.out.println( ";#################### Done. Search results are in " + pigout + " ####################"); } }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.PathCombineTextInputFormat.java
License:Apache License
public synchronized static List<PathFilter> getPoll() { List<PathFilter> pools = new ArrayList<PathFilter>(); Map<String, String> map = ProvUtil.getCode(); for (Map.Entry<String, String> entry : map.entrySet()) { final String prov_id = entry.getValue(); pools.add(new PathFilter() { String provId = prov_id; @Override/*from ww w . j av a2 s .c o m*/ public boolean accept(Path path) { String fileName = path.getParent().toString(); boolean need = fileName.endsWith(prov_id); return need; } }); } return pools; }
From source file:com.btoddb.chronicle.apps.AvroTools.java
License:Open Source License
private void go(String srcDir) throws URISyntaxException, IOException { hdfsFs = FileSystem.get(new URI(srcDir), hdfsConfig); System.out.println();/*from ww w . j a va 2s .c o m*/ System.out.println("Processing files from " + srcDir); System.out.println(); logger.debug("Searching for files in {}", srcDir); Path path = new Path(srcDir); if (!hdfsFs.exists(path)) { System.out.println("The path does not exist - cannot continue : " + path.toString()); return; } FileStatus[] statuses = hdfsFs.listStatus(path, new PathFilter() { @Override public boolean accept(Path path) { String name = path.getName(); return !name.startsWith("_") && name.endsWith(".avro"); } }); for (FileStatus fs : statuses) { try { Path inPath = fs.getPath(); long fileSize = hdfsFs.getFileStatus(inPath).getLen(); System.out.println(String.format("Processing file, %s (%d)", inPath.toString(), fileSize)); testFileAndFix(inPath); } catch (Exception e) { // don't care about the cause, the test should be able to read all files it cares about e.printStackTrace(); } } }
From source file:com.cloudera.cdk.data.filesystem.PathFilters.java
License:Apache License
public static PathFilter notHidden() { return new PathFilter() { @Override//from w w w . java 2s. co m public boolean accept(Path path) { return !(path.getName().startsWith(".") || path.getName().startsWith("_")); } }; }