List of usage examples for org.apache.hadoop.fs PathFilter PathFilter
PathFilter
From source file:org.apache.nifi.processors.hadoop.ListHDFS.java
License:Apache License
private PathFilter createPathFilter(final ProcessContext context) { final Pattern filePattern = Pattern.compile(context.getProperty(FILE_FILTER).getValue()); return new PathFilter() { @Override/*from w w w.j a v a 2 s . c om*/ public boolean accept(Path path) { return filePattern.matcher(path.getName()).matches(); } }; }
From source file:org.apache.oozie.action.hadoop.TestMapReduceActionExecutor.java
License:Apache License
private Path getOutputFile(Path outputDir, FileSystem fs) throws FileNotFoundException, IOException { final FileStatus[] files = fs.listStatus(outputDir, new PathFilter() { @Override/* w w w . ja va2 s .c o m*/ public boolean accept(Path path) { return path.getName().startsWith("part"); } }); return files[0].getPath(); //part-[m/r]-00000 }
From source file:org.apache.oozie.action.hadoop.TestSqoopActionExecutor.java
License:Apache License
public void testSqoopActionFreeFormQuery() throws Exception { createDB();/*w w w . ja va2 s . com*/ Context context = createContext(getActionXmlFreeFromQuery()); final RunningJob launcherJob = submitAction(context); String launcherId = context.getAction().getExternalId(); waitFor(120 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), context.getProtoActionConf()); assertFalse(LauncherMapperHelper.hasIdSwap(actionData)); SqoopActionExecutor ae = new SqoopActionExecutor(); ae.check(context, context.getAction()); assertTrue(launcherId.equals(context.getAction().getExternalId())); assertEquals("SUCCEEDED", context.getAction().getExternalStatus()); assertNotNull(context.getAction().getExternalChildIDs()); ae.end(context, context.getAction()); assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus()); String hadoopCounters = context.getVar(MapReduceActionExecutor.HADOOP_COUNTERS); assertNotNull(hadoopCounters); assertFalse(hadoopCounters.isEmpty()); FileSystem fs = getFileSystem(); FileStatus[] parts = fs.listStatus(new Path(getSqoopOutputDir()), new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("part-"); } }); int count = 0; for (FileStatus part : parts) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(part.getPath()))); String line = br.readLine(); while (line != null) { assertTrue(line.contains("a")); count++; line = br.readLine(); } br.close(); } assertEquals(3, count); assertNotNull(context.getAction().getData()); Properties outputData = new Properties(); outputData.load(new StringReader(context.getAction().getData())); assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS)); assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 0); }
From source file:org.apache.oozie.service.ShareLibService.java
License:Apache License
/** * Purge libs./*w w w . j a v a 2s . c o m*/ * * @param fs the fs * @param prefix the prefix * @param current the current time * @throws IOException Signals that an I/O exception has occurred. */ private void purgeLibs(FileSystem fs, final String prefix, final Date current) throws IOException { Path executorLibBasePath = services.get(WorkflowAppService.class).getSystemLibPath(); PathFilter directoryFilter = new PathFilter() { @Override public boolean accept(Path path) { if (path.getName().startsWith(prefix)) { String name = path.getName(); String time = name.substring(prefix.length()); Date d = null; try { d = dateFormat.parse(time); } catch (ParseException e) { return false; } return (current.getTime() - d.getTime()) > retentionTime; } else { return false; } } }; FileStatus[] dirList = fs.listStatus(executorLibBasePath, directoryFilter); Arrays.sort(dirList, new Comparator<FileStatus>() { // sort in desc order @Override public int compare(FileStatus o1, FileStatus o2) { return o2.getPath().getName().compareTo(o1.getPath().getName()); } }); // Logic is to keep all share-lib between current timestamp and 7days old + 1 latest sharelib older than 7 days. // refer OOZIE-1761 for (int i = 1; i < dirList.length; i++) { Path dirPath = dirList[i].getPath(); fs.delete(dirPath, true); LOG.info("Deleted old launcher jar lib directory {0}", dirPath.getName()); } }
From source file:org.apache.oozie.service.ShareLibService.java
License:Apache License
/** * Gets the Latest lib path.//from w w w . j a v a 2 s . c o m * * @param rootDir the root dir * @param prefix the prefix * @return latest lib path * @throws IOException Signals that an I/O exception has occurred. */ public Path getLatestLibPath(Path rootDir, final String prefix) throws IOException { Date max = new Date(0L); Path path = null; PathFilter directoryFilter = new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(prefix); } }; FileStatus[] files = fs.listStatus(rootDir, directoryFilter); for (FileStatus file : files) { String name = file.getPath().getName().toString(); String time = name.substring(prefix.length()); Date d = null; try { d = dateFormat.parse(time); } catch (ParseException e) { continue; } if (d.compareTo(max) > 0) { path = file.getPath(); max = d; } } // If there are no timestamped directories, fall back to root directory if (path == null) { path = rootDir; } return path; }
From source file:org.apache.pig.test.TestEvalPipeline2.java
License:Apache License
@SuppressWarnings("unchecked") @Test/*from ww w. j av a 2 s.c o m*/ public void testCrossAfterGroupAll() throws Exception { String[] input = { "1\tA", "2\tB", "3\tC", "4\tD", }; Util.createInputFile(cluster, "table_testCrossAfterGroupAll", input); try { pigServer.getPigContext().getProperties().setProperty("pig.exec.reducers.bytes.per.reducer", "40"); pigServer.registerQuery("A = load 'table_testCrossAfterGroupAll' as (a0:int, a1:chararray);"); pigServer.registerQuery("B = group A all;"); pigServer.registerQuery("C = foreach B generate COUNT(A);"); pigServer.registerQuery("D = cross A, C;"); Path output = FileLocalizer.getTemporaryPath(pigServer.getPigContext()); ExecJob job = pigServer.store("D", output.toString()); FileSystem fs = output.getFileSystem(cluster.getConfiguration()); FileStatus[] partFiles = fs.listStatus(output, new PathFilter() { @Override public boolean accept(Path path) { if (path.getName().startsWith("part")) { return true; } return false; } }); // auto-parallelism is 2 in MR, 20 in Tez, so check >=2 Assert.assertTrue(partFiles.length >= 2); // Check the output Iterator<Tuple> iter = job.getResults(); List<Tuple> results = new ArrayList<Tuple>(); while (iter.hasNext()) { results.add(iter.next()); } Collections.sort(results); Assert.assertEquals(4, results.size()); Assert.assertEquals("(1,A,4)", results.get(0).toString()); Assert.assertEquals("(2,B,4)", results.get(1).toString()); Assert.assertEquals("(3,C,4)", results.get(2).toString()); Assert.assertEquals("(4,D,4)", results.get(3).toString()); } finally { pigServer.getPigContext().getProperties().remove("pig.exec.reducers.bytes.per.reducer"); } }
From source file:org.apache.pig.test.Util.java
License:Apache License
static public String[] readOutput(FileSystem fs, String fileName) throws IOException { if (Util.WINDOWS) { fileName = fileName.replace('\\', '/'); }/* w w w . j a v a 2 s.c o m*/ Path path = new Path(fileName); if (!fs.exists(path)) { throw new IOException("Path " + fileName + " does not exist on the FileSystem"); } FileStatus fileStatus = fs.getFileStatus(path); FileStatus[] files; if (fileStatus.isDir()) { files = fs.listStatus(path, new PathFilter() { @Override public boolean accept(Path p) { return !p.getName().startsWith("_"); } }); } else { files = new FileStatus[] { fileStatus }; } List<String> result = new ArrayList<String>(); for (FileStatus f : files) { FSDataInputStream stream = fs.open(f.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(stream, "UTF-8")); String line; while ((line = br.readLine()) != null) { result.add(line); } br.close(); } return result.toArray(new String[result.size()]); }
From source file:org.apache.pig.test.Util.java
License:Apache License
/** * Returns a PathFilter that filters out filenames that start with _. * @return PathFilter//from w w w . j a v a2s . co m */ public static PathFilter getSuccessMarkerPathFilter() { return new PathFilter() { @Override public boolean accept(Path p) { return !p.getName().startsWith("_"); } }; }
From source file:org.apache.pig.tez.TestTezAutoParallelism.java
License:Apache License
@Test public void testGroupBy() throws IOException { // parallelism is 3 originally, reduce to 1 pigServer.getPigContext().getProperties().setProperty(PigConfiguration.PIG_NO_SPLIT_COMBINATION, "true"); pigServer.getPigContext().getProperties().setProperty(MRConfiguration.MAX_SPLIT_SIZE, "3000"); pigServer.getPigContext().getProperties().setProperty(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM, Long.toString(InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER)); pigServer.registerQuery("A = load '" + INPUT_FILE1 + "' as (name:chararray, age:int);"); pigServer.registerQuery("B = group A by name;"); pigServer.store("B", "output1"); FileSystem fs = cluster.getFileSystem(); FileStatus[] files = fs.listStatus(new Path("output1"), new PathFilter() { public boolean accept(Path path) { if (path.getName().startsWith("part")) { return true; }//from w w w . jav a 2s.com return false; } }); assertEquals(files.length, 1); }
From source file:org.apache.pig.tez.TestTezAutoParallelism.java
License:Apache License
@Test public void testOrderbyDecreaseParallelism() throws IOException { // order by parallelism is 3 originally, reduce to 1 pigServer.getPigContext().getProperties().setProperty(PigConfiguration.PIG_NO_SPLIT_COMBINATION, "true"); pigServer.getPigContext().getProperties().setProperty(MRConfiguration.MAX_SPLIT_SIZE, "3000"); pigServer.getPigContext().getProperties().setProperty(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM, Long.toString(InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER)); pigServer.registerQuery("A = load '" + INPUT_FILE1 + "' as (name:chararray, age:int);"); pigServer.registerQuery("B = group A by name parallel 3;"); pigServer.registerQuery("C = foreach B generate group as name, AVG(A.age) as age;"); pigServer.registerQuery("D = order C by age;"); pigServer.store("D", "output2"); FileSystem fs = cluster.getFileSystem(); FileStatus[] files = fs.listStatus(new Path("output2"), new PathFilter() { public boolean accept(Path path) { if (path.getName().startsWith("part")) { return true; }//from www . j a va 2 s . c o m return false; } }); assertEquals(files.length, 1); }