List of usage examples for org.apache.hadoop.hdfs DistributedFileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.cloudera.impala.analysis.LoadDataStmt.java
License:Apache License
private void analyzePaths(Analyzer analyzer, HdfsTable hdfsTable) throws AnalysisException { // The user must have permission to access the source location. Since the files will // be moved from this location, the user needs to have all permission. sourceDataPath_.analyze(analyzer, Privilege.ALL); try {/*from w w w . j a v a 2s .c o m*/ Path source = sourceDataPath_.getPath(); FileSystem fs = source.getFileSystem(FileSystemUtil.getConfiguration()); // sourceDataPath_.analyze() ensured that path is on an HDFS filesystem. Preconditions.checkState(fs instanceof DistributedFileSystem); DistributedFileSystem dfs = (DistributedFileSystem) fs; if (!dfs.exists(source)) { throw new AnalysisException(String.format("INPATH location '%s' does not exist.", sourceDataPath_)); } if (dfs.isDirectory(source)) { if (FileSystemUtil.getTotalNumVisibleFiles(source) == 0) { throw new AnalysisException( String.format("INPATH location '%s' contains no visible files.", sourceDataPath_)); } if (FileSystemUtil.containsSubdirectory(source)) { throw new AnalysisException( String.format("INPATH location '%s' cannot contain subdirectories.", sourceDataPath_)); } } else { // INPATH points to a file. if (FileSystemUtil.isHiddenFile(source.getName())) { throw new AnalysisException( String.format("INPATH location '%s' points to a hidden file.", source)); } } String noWriteAccessErrorMsg = String.format( "Unable to LOAD DATA into " + "target table (%s) because Impala does not have WRITE access to HDFS " + "location: ", hdfsTable.getFullName()); HdfsPartition partition; String location; if (partitionSpec_ != null) { partition = hdfsTable.getPartition(partitionSpec_.getPartitionSpecKeyValues()); location = partition.getLocation(); if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) { throw new AnalysisException(noWriteAccessErrorMsg + partition.getLocation()); } } else { // "default" partition partition = hdfsTable.getPartitions().get(0); location = hdfsTable.getLocation(); if (!hdfsTable.hasWriteAccess()) { throw new AnalysisException(noWriteAccessErrorMsg + hdfsTable.getLocation()); } } Preconditions.checkNotNull(partition); // Until Frontend.loadTableData() can handle cross-filesystem and filesystems // that aren't HDFS, require that source and dest are on the same HDFS. if (!FileSystemUtil.isPathOnFileSystem(new Path(location), fs)) { throw new AnalysisException(String.format( "Unable to LOAD DATA into target table (%s) because source path (%s) and " + "destination %s (%s) are on different file-systems.", hdfsTable.getFullName(), source, partitionSpec_ == null ? "table" : "partition", partition.getLocation())); } // Verify the files being loaded are supported. for (FileStatus fStatus : fs.listStatus(source)) { if (fs.isDirectory(fStatus.getPath())) continue; StringBuilder errorMsg = new StringBuilder(); HdfsFileFormat fileFormat = partition.getInputFormatDescriptor().getFileFormat(); if (!fileFormat.isFileCompressionTypeSupported(fStatus.getPath().toString(), errorMsg)) { throw new AnalysisException(errorMsg.toString()); } } } catch (FileNotFoundException e) { throw new AnalysisException("File not found: " + e.getMessage(), e); } catch (IOException e) { throw new AnalysisException("Error accessing file system: " + e.getMessage(), e); } }
From source file:com.trace.hadoop.TestDFSRename.java
License:Apache License
/** * Perform operations such as setting quota, deletion of files, rename and * ensure system can apply edits log during startup. *//*from w w w . j ava 2s. c o m*/ public void testEditsLog() throws Exception { DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem(); Path src1 = new Path(dir, "testEditsLog/srcdir/src1"); Path dst1 = new Path(dir, "testEditsLog/dstdir/dst1"); createFile(fs, src1); fs.mkdirs(dst1.getParent()); createFile(fs, dst1); // Set quota so that dst1 parent cannot allow under it new files/directories fs.setQuota(dst1.getParent(), 2, FSConstants.QUOTA_DONT_SET); // Free up quota for a subsequent rename fs.delete(dst1, true); rename(src1, dst1, true, false); // Restart the cluster and ensure the above operations can be // loaded from the edits log restartCluster(); fs = (DistributedFileSystem) cluster.getFileSystem(); assertFalse(fs.exists(src1)); // ensure src1 is already renamed assertTrue(fs.exists(dst1)); // ensure rename dst exists }
From source file:com.trace.hadoop.TestDFSRename.java
License:Apache License
private void rename(Path src, Path dst, boolean renameSucceeds, boolean quotaException) throws Exception { DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem(); try {/*from w w w. j a v a2 s. c om*/ assertEquals(renameSucceeds, fs.rename(src, dst)); } catch (QuotaExceededException ex) { assertTrue(quotaException); } assertEquals(renameSucceeds, !fs.exists(src)); assertEquals(renameSucceeds, fs.exists(dst)); }
From source file:io.druid.indexer.HdfsClasspathSetupTest.java
License:Apache License
@Test public void testAddSnapshotJarToClasspath() throws IOException { Job job = Job.getInstance(conf, "test-job"); DistributedFileSystem fs = miniCluster.getFileSystem(); Path intermediatePath = new Path("/tmp/classpath"); JobHelper.addSnapshotJarToClassPath(dummyJarFile, intermediatePath, fs, job); Path expectedJarPath = new Path(intermediatePath, dummyJarFile.getName()); // check file gets uploaded to HDFS Assert.assertTrue(fs.exists(expectedJarPath)); // check file gets added to the classpath Assert.assertEquals(expectedJarPath.toString(), job.getConfiguration().get(MRJobConfig.CLASSPATH_FILES)); Assert.assertEquals(dummyJarString, StringUtils.fromUtf8(IOUtils.toByteArray(fs.open(expectedJarPath)))); }
From source file:io.druid.indexer.HdfsClasspathSetupTest.java
License:Apache License
@Test public void testAddNonSnapshotJarToClasspath() throws IOException { Job job = Job.getInstance(conf, "test-job"); DistributedFileSystem fs = miniCluster.getFileSystem(); JobHelper.addJarToClassPath(dummyJarFile, finalClasspath, intermediatePath, fs, job); Path expectedJarPath = new Path(finalClasspath, dummyJarFile.getName()); // check file gets uploaded to final HDFS path Assert.assertTrue(fs.exists(expectedJarPath)); // check that the intermediate file gets deleted Assert.assertFalse(fs.exists(new Path(intermediatePath, dummyJarFile.getName()))); // check file gets added to the classpath Assert.assertEquals(expectedJarPath.toString(), job.getConfiguration().get(MRJobConfig.CLASSPATH_FILES)); Assert.assertEquals(dummyJarString, StringUtils.fromUtf8(IOUtils.toByteArray(fs.open(expectedJarPath)))); }
From source file:io.druid.indexer.HdfsClasspathSetupTest.java
License:Apache License
@Test public void testConcurrentUpload() throws IOException, InterruptedException, ExecutionException, TimeoutException { final int concurrency = 10; ListeningExecutorService pool = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrency)); // barrier ensures that all jobs try to add files to classpath at same time. final CyclicBarrier barrier = new CyclicBarrier(concurrency); final DistributedFileSystem fs = miniCluster.getFileSystem(); final Path expectedJarPath = new Path(finalClasspath, dummyJarFile.getName()); List<ListenableFuture<Boolean>> futures = new ArrayList<>(); for (int i = 0; i < concurrency; i++) { futures.add(pool.submit(new Callable() { @Override// ww w.ja va 2 s . com public Boolean call() throws Exception { int id = barrier.await(); Job job = Job.getInstance(conf, "test-job-" + id); Path intermediatePathForJob = new Path(intermediatePath, "job-" + id); JobHelper.addJarToClassPath(dummyJarFile, finalClasspath, intermediatePathForJob, fs, job); // check file gets uploaded to final HDFS path Assert.assertTrue(fs.exists(expectedJarPath)); // check that the intermediate file is not present Assert.assertFalse(fs.exists(new Path(intermediatePathForJob, dummyJarFile.getName()))); // check file gets added to the classpath Assert.assertEquals(expectedJarPath.toString(), job.getConfiguration().get(MRJobConfig.CLASSPATH_FILES)); return true; } })); } Futures.allAsList(futures).get(30, TimeUnit.SECONDS); pool.shutdownNow(); }
From source file:org.apache.falcon.extensions.mirroring.hdfsSnapshot.HdfsSnapshotMirroringExtension.java
License:Apache License
@Override public void validate(final Properties extensionProperties) throws FalconException { for (HdfsSnapshotMirrorProperties option : HdfsSnapshotMirrorProperties.values()) { if (extensionProperties.getProperty(option.getName()) == null && option.isRequired()) { throw new FalconException("Missing extension property: " + option.getName()); }/* www . j a v a 2s .c o m*/ } Cluster sourceCluster = ClusterHelper .getCluster(extensionProperties.getProperty(HdfsSnapshotMirrorProperties.SOURCE_CLUSTER.getName())); if (sourceCluster == null) { throw new FalconException( "SourceCluster entity " + HdfsSnapshotMirrorProperties.SOURCE_CLUSTER.getName() + " not found"); } Cluster targetCluster = ClusterHelper .getCluster(extensionProperties.getProperty(HdfsSnapshotMirrorProperties.TARGET_CLUSTER.getName())); if (targetCluster == null) { throw new FalconException( "TargetCluster entity " + HdfsSnapshotMirrorProperties.TARGET_CLUSTER.getName() + " not found"); } Configuration sourceConf = ClusterHelper.getConfiguration(sourceCluster); Configuration targetConf = ClusterHelper.getConfiguration(targetCluster); DistributedFileSystem sourceFileSystem = HadoopClientFactory.get() .createDistributedProxiedFileSystem(sourceConf); DistributedFileSystem targetFileSystem = HadoopClientFactory.get() .createDistributedProxiedFileSystem(targetConf); Path sourcePath = new Path( extensionProperties.getProperty(HdfsSnapshotMirrorProperties.SOURCE_SNAPSHOT_DIR.getName())); Path targetPath = new Path( extensionProperties.getProperty(HdfsSnapshotMirrorProperties.TARGET_SNAPSHOT_DIR.getName())); // check if source and target path's exist and are snapshot-able try { if (sourceFileSystem.exists(sourcePath)) { if (!isDirSnapshotable(sourceFileSystem, sourcePath)) { throw new FalconException(HdfsSnapshotMirrorProperties.SOURCE_SNAPSHOT_DIR.getName() + " " + sourcePath.toString() + " does not allow snapshots."); } } else { throw new FalconException(HdfsSnapshotMirrorProperties.SOURCE_SNAPSHOT_DIR.getName() + " " + sourcePath.toString() + " does not exist."); } if (targetFileSystem.exists(targetPath)) { if (!isDirSnapshotable(targetFileSystem, targetPath)) { throw new FalconException(HdfsSnapshotMirrorProperties.TARGET_SNAPSHOT_DIR.getName() + " " + targetPath.toString() + " does not allow snapshots."); } } else { throw new FalconException(HdfsSnapshotMirrorProperties.TARGET_SNAPSHOT_DIR.getName() + " " + targetPath.toString() + " does not exist."); } } catch (IOException e) { throw new FalconException(e.getMessage(), e); } }
From source file:org.apache.phoenix.end2end.IndexScrutinyToolIT.java
License:Apache License
/** * Tests that with the output to file option set, the scrutiny tool outputs invalid rows to file *//*from w w w .j ava 2s.c o m*/ @Test public void testOutputInvalidRowsToFile() throws Exception { insertOneValid_OneBadVal_OneMissingTarget(); String[] argValues = getArgValues(schemaName, dataTableName, indexTableName, System.currentTimeMillis(), 10L, SourceTable.DATA_TABLE_SOURCE, true, OutputFormat.FILE, null); runScrutiny(argValues); // check the output files Path outputPath = CsvBulkImportUtil.getOutputPath(new Path(outputDir), dataTableFullName); DistributedFileSystem fs = getUtility().getDFSCluster().getFileSystem(); List<Path> paths = Lists.newArrayList(); Path firstPart = null; for (FileStatus outputFile : fs.listStatus(outputPath)) { if (outputFile.getPath().getName().startsWith("part")) { if (firstPart == null) { firstPart = outputFile.getPath(); } else { paths.add(outputFile.getPath()); } } } if (dataTableDdl.contains("SALT_BUCKETS")) { fs.concat(firstPart, paths.toArray(new Path[0])); } Path outputFilePath = firstPart; assertTrue(fs.exists(outputFilePath)); FSDataInputStream fsDataInputStream = fs.open(outputFilePath); BufferedReader reader = new BufferedReader(new InputStreamReader(fsDataInputStream)); TreeSet<String> lines = Sets.newTreeSet(); try { String line = null; while ((line = reader.readLine()) != null) { lines.add(line); } } finally { IOUtils.closeQuietly(reader); IOUtils.closeQuietly(fsDataInputStream); } Iterator<String> lineIterator = lines.iterator(); assertEquals("[2, name-2, " + new Timestamp(testTime).toString() + ", 95123]\t[2, name-2, " + new Timestamp(testTime).toString() + ", 9999]", lineIterator.next()); assertEquals("[3, name-3, " + new Timestamp(testTime).toString() + ", 95123]\tTarget row not found", lineIterator.next()); }
From source file:org.apache.tajo.storage.TestByteBufLineReader.java
License:Apache License
@Test public void testReaderWithDFS() throws Exception { final Configuration conf = new HdfsConfiguration(); String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString(); conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath); conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0); conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, true); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); cluster.waitClusterUp();//from w ww . ja v a2 s.co m TajoConf tajoConf = new TajoConf(conf); tajoConf.setVar(TajoConf.ConfVars.ROOT_DIR, cluster.getFileSystem().getUri() + "/tajo"); Path tablePath = new Path("/testReaderWithDFS"); Path filePath = new Path(tablePath, "data.dat"); try { DistributedFileSystem fs = cluster.getFileSystem(); FSDataOutputStream out = fs.create(filePath, true); out.write(LINE.getBytes(Charset.defaultCharset())); out.write('\n'); out.close(); assertTrue(fs.exists(filePath)); FSDataInputStream inputStream = fs.open(filePath); assertTrue(inputStream.getWrappedStream() instanceof ByteBufferReadable); ByteBufLineReader lineReader = new ByteBufLineReader(new FSDataInputChannel(inputStream)); assertEquals(LINE, lineReader.readLine()); lineReader.seek(0); assertEquals(LINE, lineReader.readLine()); assertNull(lineReader.readLine()); lineReader.close(); fs.close(); } finally { cluster.shutdown(true); } }
From source file:org.apache.tajo.storage.TestFileStorageManager.java
License:Apache License
@Test public void testGetSplit() throws Exception { final Configuration conf = new HdfsConfiguration(); String testDataPath = TEST_PATH + "/" + UUID.randomUUID().toString(); conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath); conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0); conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, false); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); cluster.waitClusterUp();//from w w w .j a va 2 s .co m TajoConf tajoConf = new TajoConf(conf); tajoConf.setVar(TajoConf.ConfVars.ROOT_DIR, cluster.getFileSystem().getUri() + "/tajo"); int testCount = 10; Path tablePath = new Path("/testGetSplit"); try { DistributedFileSystem fs = cluster.getFileSystem(); // Create test partitions List<Path> partitions = Lists.newArrayList(); for (int i = 0; i < testCount; i++) { Path tmpFile = new Path(tablePath, String.valueOf(i)); DFSTestUtil.createFile(fs, new Path(tmpFile, "tmpfile.dat"), 10, (short) 2, 0xDEADDEADl); partitions.add(tmpFile); } assertTrue(fs.exists(tablePath)); FileStorageManager sm = (FileStorageManager) StorageManager.getFileStorageManager(tajoConf); assertEquals(fs.getUri(), sm.getFileSystem().getUri()); Schema schema = new Schema(); schema.addColumn("id", Type.INT4); schema.addColumn("age", Type.INT4); schema.addColumn("name", Type.TEXT); TableMeta meta = CatalogUtil.newTableMeta(StoreType.CSV); List<Fragment> splits = Lists.newArrayList(); // Get FileFragments in partition batch splits.addAll(sm.getSplits("data", meta, schema, partitions.toArray(new Path[partitions.size()]))); assertEquals(testCount, splits.size()); // -1 is unknown volumeId assertEquals(-1, ((FileFragment) splits.get(0)).getDiskIds()[0]); splits.clear(); splits.addAll(sm.getSplits("data", meta, schema, partitions.subList(0, partitions.size() / 2).toArray(new Path[partitions.size() / 2]))); assertEquals(testCount / 2, splits.size()); assertEquals(1, splits.get(0).getHosts().length); assertEquals(-1, ((FileFragment) splits.get(0)).getDiskIds()[0]); fs.close(); } finally { cluster.shutdown(true); } }