List of usage examples for org.apache.hadoop.fs PathFilter PathFilter
PathFilter
From source file:org.apache.solr.update.HdfsUpdateLog.java
License:Apache License
public static String[] getLogList(FileSystem fs, Path tlogDir) { final String prefix = TLOG_NAME + '.'; assert fs != null; FileStatus[] fileStatuses;// w w w . j a va 2 s .c o m try { fileStatuses = fs.listStatus(tlogDir, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(prefix); } }); } catch (FileNotFoundException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } String[] names = new String[fileStatuses.length]; for (int i = 0; i < fileStatuses.length; i++) { names[i] = fileStatuses[i].getPath().getName(); } Arrays.sort(names); return names; }
From source file:org.apache.solr.update.HdfsUpdateLog.java
License:Apache License
private String[] getLogList(Path tlogDir) throws FileNotFoundException, IOException { final String prefix = TLOG_NAME + '.'; FileStatus[] files = fs.listStatus(tlogDir, new PathFilter() { @Override/*from w w w . java 2s . c o m*/ public boolean accept(Path name) { return name.getName().startsWith(prefix); } }); List<String> fileList = new ArrayList<String>(files.length); for (FileStatus file : files) { fileList.add(file.getPath().getName()); } return fileList.toArray(new String[0]); }
From source file:org.apache.sqoop.avro.AvroUtil.java
License:Apache License
/** * Get the schema of AVRO files stored in a directory */// w ww .j a va 2 s. com public static Schema getAvroSchema(Path path, Configuration conf) throws IOException { FileSystem fs = path.getFileSystem(conf); Path fileToTest; if (fs.isDirectory(path)) { FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() { @Override public boolean accept(Path p) { String name = p.getName(); return !name.startsWith("_") && !name.startsWith("."); } }); if (fileStatuses.length == 0) { return null; } fileToTest = fileStatuses[0].getPath(); } else { fileToTest = path; } SeekableInput input = new FsInput(fileToTest, conf); DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader); Schema result = fileReader.getSchema(); fileReader.close(); return result; }
From source file:org.apache.tajo.engine.function.FunctionLoader.java
License:Apache License
/** * Load functions that are defined by users. * * @param conf/* www. j a v a2 s . c o m*/ * @param functionMap * @return * @throws IOException */ public static Map<FunctionSignature, FunctionDesc> loadUserDefinedFunctions(TajoConf conf, Map<FunctionSignature, FunctionDesc> functionMap) throws IOException { String[] codePaths = conf.getStrings(TajoConf.ConfVars.PYTHON_CODE_DIR.varname); if (codePaths != null) { FileSystem localFS = FileSystem.getLocal(conf); for (String codePathStr : codePaths) { Path codePath = new Path(codePathStr); List<Path> filePaths = TUtil.newList(); if (localFS.isDirectory(codePath)) { for (FileStatus file : localFS.listStatus(codePath, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().endsWith(PythonScriptEngine.FILE_EXTENSION); } })) { filePaths.add(file.getPath()); } } else { filePaths.add(codePath); } for (Path filePath : filePaths) { for (FunctionDesc f : PythonScriptEngine.registerFunctions(filePath.toUri(), FunctionLoader.PYTHON_FUNCTION_NAMESPACE)) { functionMap.put(f.getSignature(), f); } } } } return functionMap; }
From source file:org.apache.tajo.engine.query.ResultSetImpl.java
License:Apache License
private Collection<Fragment> getFragmentsNG(TableMeta meta, Path tablePath) throws IOException { List<Fragment> fraglist = Lists.newArrayList(); FileStatus[] files = fs.listStatus(tablePath, new PathFilter() { @Override//from w w w. j av a 2s .com public boolean accept(Path path) { return path.getName().charAt(0) != '.'; } }); Arrays.sort(files, new FileNameComparator()); String tbname = tablePath.getName(); for (int i = 0; i < files.length; i++) { if (files[i].getLen() == 0) { continue; } fraglist.add(new Fragment(tbname + "_" + i, files[i].getPath(), meta, 0l, files[i].getLen(), null)); } return fraglist; }
From source file:org.apache.tajo.jdbc.TajoResultSet.java
License:Apache License
private List<Fragment> getFragments(Path tablePath) throws IOException { List<Fragment> fragments = Lists.newArrayList(); FileStatus[] files = fs.listStatus(tablePath, new PathFilter() { @Override/*from ww w .ja v a 2 s .c o m*/ public boolean accept(Path path) { return path.getName().charAt(0) != '.'; } }); // The files must be sorted in an ascending order of file names // in order to guarantee the order of a sort operation. // This is because our distributed sort algorithm outputs // a sequence of sorted data files, each of which contains sorted rows // within each file. Arrays.sort(files, new FileNameComparator()); String tbname = tablePath.getName(); for (int i = 0; i < files.length; i++) { if (files[i].getLen() == 0) { continue; } fragments.add(new FileFragment(tbname + "_" + i, files[i].getPath(), 0l, files[i].getLen())); } return ImmutableList.copyOf(fragments); }
From source file:org.apache.tez.test.TestRecovery.java
License:Apache License
private void testHashJoinExample(SimpleShutdownCondition shutdownCondition, boolean enableAutoParallelism, boolean generateSplitInClient) throws Exception { HashJoinExample hashJoinExample = new HashJoinExample(); TezConfiguration tezConf = new TezConfiguration(miniTezCluster.getConfig()); tezConf.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, 4); tezConf.set(TezConfiguration.TEZ_AM_RECOVERY_SERVICE_CLASS, RecoveryServiceWithEventHandlingHook.class.getName()); tezConf.set(RecoveryServiceWithEventHandlingHook.AM_RECOVERY_SERVICE_HOOK_CLASS, SimpleRecoveryEventHook.class.getName()); tezConf.set(SimpleRecoveryEventHook.SIMPLE_SHUTDOWN_CONDITION, shutdownCondition.serialize()); tezConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, enableAutoParallelism);/* w w w .j av a2 s.c o m*/ tezConf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, false); tezConf.setBoolean(TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, false); tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "INFO;org.apache.tez=DEBUG"); hashJoinExample.setConf(tezConf); Path stagingDirPath = new Path("/tmp/tez-staging-dir"); Path inPath1 = new Path("/tmp/hashJoin/inPath1"); Path inPath2 = new Path("/tmp/hashJoin/inPath2"); Path outPath = new Path("/tmp/hashJoin/outPath"); remoteFs.delete(outPath, true); remoteFs.mkdirs(inPath1); remoteFs.mkdirs(inPath2); remoteFs.mkdirs(stagingDirPath); Set<String> expectedResult = new HashSet<String>(); FSDataOutputStream out1 = remoteFs.create(new Path(inPath1, "file")); FSDataOutputStream out2 = remoteFs.create(new Path(inPath2, "file")); BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1)); BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2)); for (int i = 0; i < 20; i++) { String term = "term" + i; writer1.write(term); writer1.newLine(); if (i % 2 == 0) { writer2.write(term); writer2.newLine(); expectedResult.add(term); } } writer1.close(); writer2.close(); out1.close(); out2.close(); String[] args = null; if (generateSplitInClient) { args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(), "-generateSplitInClient", inPath1.toString(), inPath2.toString(), "1", outPath.toString() }; } else { args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(), inPath1.toString(), inPath2.toString(), "1", outPath.toString() }; } assertEquals(0, hashJoinExample.run(args)); FileStatus[] statuses = remoteFs.listStatus(outPath, new PathFilter() { public boolean accept(Path p) { String name = p.getName(); return !name.startsWith("_") && !name.startsWith("."); } }); assertEquals(1, statuses.length); FSDataInputStream inStream = remoteFs.open(statuses[0].getPath()); BufferedReader reader = new BufferedReader(new InputStreamReader(inStream)); String line; while ((line = reader.readLine()) != null) { assertTrue(expectedResult.remove(line)); } reader.close(); inStream.close(); assertEquals(0, expectedResult.size()); List<HistoryEvent> historyEventsOfAttempt1 = RecoveryParser.readRecoveryEvents(tezConf, hashJoinExample.getAppId(), 1); HistoryEvent lastEvent = historyEventsOfAttempt1.get(historyEventsOfAttempt1.size() - 1); assertEquals(shutdownCondition.getEvent().getEventType(), lastEvent.getEventType()); assertTrue(shutdownCondition.match(lastEvent)); }
From source file:org.apache.tez.test.TestTezJobs.java
License:Apache License
@Test(timeout = 60000) public void testHashJoinExample() throws Exception { HashJoinExample hashJoinExample = new HashJoinExample(); hashJoinExample.setConf(mrrTezCluster.getConfig()); Path stagingDirPath = new Path("/tmp/tez-staging-dir"); Path inPath1 = new Path("/tmp/hashJoin/inPath1"); Path inPath2 = new Path("/tmp/hashJoin/inPath2"); Path outPath = new Path("/tmp/hashJoin/outPath"); remoteFs.mkdirs(inPath1);/*from w w w. j a va2 s . c o m*/ remoteFs.mkdirs(inPath2); remoteFs.mkdirs(stagingDirPath); Set<String> expectedResult = new HashSet<String>(); FSDataOutputStream out1 = remoteFs.create(new Path(inPath1, "file")); FSDataOutputStream out2 = remoteFs.create(new Path(inPath2, "file")); BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1)); BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2)); for (int i = 0; i < 20; i++) { String term = "term" + i; writer1.write(term); writer1.newLine(); if (i % 2 == 0) { writer2.write(term); writer2.newLine(); expectedResult.add(term); } } writer1.close(); writer2.close(); out1.close(); out2.close(); String[] args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(), inPath1.toString(), inPath2.toString(), "1", outPath.toString() }; assertEquals(0, hashJoinExample.run(args)); FileStatus[] statuses = remoteFs.listStatus(outPath, new PathFilter() { public boolean accept(Path p) { String name = p.getName(); return !name.startsWith("_") && !name.startsWith("."); } }); assertEquals(1, statuses.length); FSDataInputStream inStream = remoteFs.open(statuses[0].getPath()); BufferedReader reader = new BufferedReader(new InputStreamReader(inStream)); String line; while ((line = reader.readLine()) != null) { assertTrue(expectedResult.remove(line)); } reader.close(); inStream.close(); assertEquals(0, expectedResult.size()); }
From source file:org.apache.tez.test.TestTezJobs.java
License:Apache License
@Test(timeout = 60000) public void testHashJoinExampleDisableSplitGrouping() throws Exception { HashJoinExample hashJoinExample = new HashJoinExample(); hashJoinExample.setConf(conf);// ww w. j ava2 s. co m Path stagingDirPath = new Path(TEST_ROOT_DIR + "/tmp/tez-staging-dir"); Path inPath1 = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/inPath1"); Path inPath2 = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/inPath2"); Path outPath = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/outPath"); localFs.delete(outPath, true); localFs.mkdirs(inPath1); localFs.mkdirs(inPath2); localFs.mkdirs(stagingDirPath); Set<String> expectedResult = new HashSet<String>(); FSDataOutputStream out1 = localFs.create(new Path(inPath1, "file")); FSDataOutputStream out2 = localFs.create(new Path(inPath2, "file")); BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1)); BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2)); for (int i = 0; i < 20; i++) { String term = "term" + i; writer1.write(term); writer1.newLine(); if (i % 2 == 0) { writer2.write(term); writer2.newLine(); expectedResult.add(term); } } writer1.close(); writer2.close(); out1.close(); out2.close(); String[] args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(), "-local", "-disableSplitGrouping", inPath1.toString(), inPath2.toString(), "1", outPath.toString() }; assertEquals(0, hashJoinExample.run(args)); FileStatus[] statuses = localFs.listStatus(outPath, new PathFilter() { public boolean accept(Path p) { String name = p.getName(); return !name.startsWith("_") && !name.startsWith("."); } }); assertEquals(1, statuses.length); FSDataInputStream inStream = localFs.open(statuses[0].getPath()); BufferedReader reader = new BufferedReader(new InputStreamReader(inStream)); String line; while ((line = reader.readLine()) != null) { assertTrue(expectedResult.remove(line)); } reader.close(); inStream.close(); assertEquals(0, expectedResult.size()); }
From source file:org.apache.tez.test.TestTezJobs.java
License:Apache License
@Test(timeout = 60000) public void testSortMergeJoinExample() throws Exception { SortMergeJoinExample sortMergeJoinExample = new SortMergeJoinExample(); sortMergeJoinExample.setConf(new Configuration(mrrTezCluster.getConfig())); Path stagingDirPath = new Path("/tmp/tez-staging-dir"); Path inPath1 = new Path("/tmp/sortMerge/inPath1"); Path inPath2 = new Path("/tmp/sortMerge/inPath2"); Path outPath = new Path("/tmp/sortMerge/outPath"); remoteFs.mkdirs(inPath1);//w w w . ja va 2s .c om remoteFs.mkdirs(inPath2); remoteFs.mkdirs(stagingDirPath); Set<String> expectedResult = new HashSet<String>(); FSDataOutputStream out1 = remoteFs.create(new Path(inPath1, "file")); FSDataOutputStream out2 = remoteFs.create(new Path(inPath2, "file")); BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1)); BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2)); for (int i = 0; i < 20; i++) { String term = "term" + i; writer1.write(term); writer1.newLine(); if (i % 2 == 0) { writer2.write(term); writer2.newLine(); expectedResult.add(term); } } writer1.close(); writer2.close(); out1.close(); out2.close(); String[] args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(), inPath1.toString(), inPath2.toString(), "1", outPath.toString() }; assertEquals(0, sortMergeJoinExample.run(args)); FileStatus[] statuses = remoteFs.listStatus(outPath, new PathFilter() { public boolean accept(Path p) { String name = p.getName(); return !name.startsWith("_") && !name.startsWith("."); } }); assertEquals(1, statuses.length); FSDataInputStream inStream = remoteFs.open(statuses[0].getPath()); BufferedReader reader = new BufferedReader(new InputStreamReader(inStream)); String line; while ((line = reader.readLine()) != null) { assertTrue(expectedResult.remove(line)); } reader.close(); inStream.close(); assertEquals(0, expectedResult.size()); }