List of usage examples for org.apache.hadoop.fs FileStatus isDirectory
public boolean isDirectory()
From source file:com.uber.hoodie.common.table.timeline.dto.FileStatusDTO.java
License:Apache License
public static FileStatusDTO fromFileStatus(FileStatus fileStatus) { if (null == fileStatus) { return null; }/*from w w w. j a v a2s. c o m*/ FileStatusDTO dto = new FileStatusDTO(); try { dto.path = FilePathDTO.fromPath(fileStatus.getPath()); dto.length = fileStatus.getLen(); dto.isdir = fileStatus.isDirectory(); dto.blockReplication = fileStatus.getReplication(); dto.blocksize = fileStatus.getBlockSize(); dto.modificationTime = fileStatus.getModificationTime(); dto.accessTime = fileStatus.getModificationTime(); dto.symlink = fileStatus.isSymlink() ? FilePathDTO.fromPath(fileStatus.getSymlink()) : null; safeReadAndSetMetadata(dto, fileStatus); } catch (IOException ioe) { throw new HoodieException(ioe); } return dto; }
From source file:com.vertica.hadoop.FixedSplitFileInputFormat.java
License:Apache License
/** * Add files in the input path recursively into the results. * @param result//from www .java2s . c om * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { for (FileStatus stat : fs.listStatus(path, inputFilter)) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } }
From source file:com.vertica.hadoop.FixedSplitFileInputFormat.java
License:Apache License
/** List input directories. * Subclasses may override to, e.g., select only files matching a regular * expression. //ww w . ja va 2 s .c om * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected FileStatus[] listStatus(JobConf job) throws IOException { Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job); // Whether we need to recursive look into the directory structure boolean recursive = job.getBoolean("mapred.input.dir.recursive", false); List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (Path p : dirs) { FileSystem fs = p.getFileSystem(job); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDirectory()) { for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result.toArray(new FileStatus[result.size()]); }
From source file:com.vertica.hadoop.FixedSplitFileInputFormat.java
License:Apache License
/** Splits files returned by {@link #listStatus(JobConf)} when * they're too big.*//*w w w . j a v a 2 s .c om*/ @SuppressWarnings("deprecation") public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { FileStatus[] files = listStatus(job); // Save the number of input files for metrics/loadgen job.setLong(NUM_INPUT_FILES, files.length); long totalSize = 0; // compute total size for (FileStatus file : files) { // check we have valid files if (file.isDirectory()) { throw new IOException("Not a file: " + file.getPath()); } totalSize += file.getLen(); } long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits); long minSize = Math.max(job.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 1), minSplitSize); // generate splits ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits); NetworkTopology clusterMap = new NetworkTopology(); for (FileStatus file : files) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(fs, path)) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(goalSize, minSize, blockSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { String[] splitHosts = getSplitHosts(blkLocations, length - bytesRemaining, splitSize, clusterMap); splits.add(makeSplit(path, length - bytesRemaining, splitSize, splitHosts)); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { String[] splitHosts = getSplitHosts(blkLocations, length - bytesRemaining, bytesRemaining, clusterMap); splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, splitHosts)); } } else if (length != 0) { String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap); splits.add(makeSplit(path, 0, length, splitHosts)); } else { //Create empty hosts array for zero length files splits.add(makeSplit(path, 0, length, new String[0])); } } LOG.debug("Total # of splits: " + splits.size()); return splits.toArray(new FileSplit[splits.size()]); }
From source file:com.wandisco.s3hdfs.rewrite.filter.TestCurlCommands.java
License:Apache License
@Test public void testCurlCreateBucket1() throws IOException, URISyntaxException, S3ServiceException, InterruptedException { S3HdfsPath s3HdfsPath = testUtil.setUpS3HdfsPath("rewrite", null, "flavaflav"); ProcessBuilder pb = new ProcessBuilder("curl", "-v", "-L", "-X", "PUT", "http://" + hostName + ":" + PROXY_PORT + "/rewrite?user.name=flavaflav"); Process proc = pb.start();//from w ww. ja va 2 s. c om proc.waitFor(); String out = testUtil.readInputStream(proc.getInputStream()); String out2 = testUtil.readInputStream(proc.getErrorStream()); System.err.println(out); System.err.println(out2); FileStatus retVal = hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0]; System.out.println(hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0].getPath()); assertEquals("rewrite", retVal.getPath().getName()); assertEquals("flavaflav", retVal.getOwner()); assertTrue(retVal.isDirectory()); FileStatus[] inside = hdfs.listStatus(new Path(retVal.getPath().toString())); assertEquals(0, inside.length); }
From source file:com.wandisco.s3hdfs.rewrite.filter.TestCurlCommands.java
License:Apache License
@Test public void testCurlCreateBucket2() throws IOException, URISyntaxException, S3ServiceException, InterruptedException { S3HdfsPath s3HdfsPath = testUtil.setUpS3HdfsPath("rewrite", null, "flavaflav"); ProcessBuilder pb = new ProcessBuilder("curl", "-v", "-L", "-X", "PUT", "-H", "Host: rewrite." + hostName, "http://" + hostName + ":" + PROXY_PORT + "/?user.name=flavaflav"); Process proc = pb.start();// w w w. java 2s .c o m proc.waitFor(); String out = testUtil.readInputStream(proc.getInputStream()); String out2 = testUtil.readInputStream(proc.getErrorStream()); System.out.println(out); System.out.println(out2); FileStatus retVal = hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0]; System.out.println(hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0].getPath()); assertEquals("rewrite", retVal.getPath().getName()); assertEquals("flavaflav", retVal.getOwner()); assertTrue(retVal.isDirectory()); FileStatus[] inside = hdfs.listStatus(new Path(retVal.getPath().toString())); assertEquals(0, inside.length); }
From source file:com.wandisco.s3hdfs.rewrite.filter.TestCurlCommands.java
License:Apache License
@Test public void testCurlGetNonExistantObject() throws IOException, URISyntaxException, S3ServiceException, InterruptedException { // WITHOUT BUCKET S3HdfsPath s3HdfsPath = testUtil.setUpS3HdfsPath("myBucket", "S3HDFS%2Fslot%2D01special%2Dtapestart", "flavaflav"); ProcessBuilder pb = new ProcessBuilder("curl", "-v", "-L", "-X", "GET", "http://" + hostName + ":" + PROXY_PORT + "/myBucket/" + s3HdfsPath.getObjectName() + "?user.name=flavaflav"); Process proc = pb.start();//from ww w .j ava 2 s. c o m proc.waitFor(); String out = testUtil.readInputStream(proc.getInputStream()); String out2 = testUtil.readInputStream(proc.getErrorStream()); System.out.println(out); System.out.println(out2); assert out2.contains("HTTP/1.1 404 Not Found"); // MAKE BUCKET ProcessBuilder pb2 = new ProcessBuilder("curl", "-v", "-L", "-X", "PUT", "-H", "Host: myBucket." + hostName, "http://" + hostName + ":" + PROXY_PORT + "/?user.name=flavaflav"); Process proc2 = pb2.start(); proc2.waitFor(); out = testUtil.readInputStream(proc2.getInputStream()); out2 = testUtil.readInputStream(proc2.getErrorStream()); System.out.println(out); System.out.println(out2); FileStatus retVal = hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0]; System.out.println(hdfs.listStatus(new Path(s3HdfsPath.getHdfsRootUserPath()))[0].getPath()); assertEquals("myBucket", retVal.getPath().getName()); assertEquals("flavaflav", retVal.getOwner()); assertTrue(retVal.isDirectory()); FileStatus[] inside = hdfs.listStatus(new Path(retVal.getPath().toString())); assertEquals(0, inside.length); //WITH BUCKET ProcessBuilder pb3 = new ProcessBuilder("curl", "-v", "-L", "-X", "GET", "http://" + hostName + ":" + PROXY_PORT + "/myBucket/" + s3HdfsPath.getObjectName() + "?user.name=flavaflav"); Process proc3 = pb3.start(); proc3.waitFor(); out = testUtil.readInputStream(proc3.getInputStream()); out2 = testUtil.readInputStream(proc3.getErrorStream()); System.out.println("LAST: " + out); System.out.println("LAST: " + out2); assert out2.contains("HTTP/1.1 404 Not Found"); }
From source file:com.xiaomi.linden.hadoop.indexing.reduce.FileSystemDirectory.java
License:Apache License
/** * Constructor/* w w w . ja v a 2s . c om*/ * @param fs * @param directory * @param create * @param conf * @throws IOException */ public FileSystemDirectory(FileSystem fs, Path directory, boolean create, Configuration conf) throws IOException { this.fs = fs; this.directory = directory; this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096); if (create) { create(); } boolean isDir = false; try { FileStatus status = fs.getFileStatus(directory); if (status != null) { isDir = status.isDirectory(); } } catch (IOException e) { // file does not exist, isDir already set to false } if (!isDir) { throw new IOException(directory + " is not a directory"); } }
From source file:com.xiaomi.linden.hadoop.indexing.reduce.FileSystemDirectory.java
License:Apache License
private void create() throws IOException { if (!fs.exists(directory)) { fs.mkdirs(directory);//from w w w . j a v a 2 s . com } boolean isDir = false; try { FileStatus status = fs.getFileStatus(directory); if (status != null) { isDir = status.isDirectory(); } } catch (IOException e) { // file does not exist, isDir already set to false } if (!isDir) { throw new IOException(directory + " is not a directory"); } // clear old index files FileStatus[] fileStatus = fs.listStatus(directory); for (int i = 0; i < fileStatus.length; i++) { if (!fs.delete(fileStatus[i].getPath(), true)) { throw new IOException("Cannot delete index file " + fileStatus[i].getPath()); } } }
From source file:com.yahoo.glimmer.util.MapReducePartInputStreamEnumeration.java
License:Open Source License
public MapReducePartInputStreamEnumeration(FileSystem fileSystem, Path srcPath) throws IOException { this.fileSystem = fileSystem; CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf()); codecIfAny = factory.getCodec(srcPath); FileStatus srcFileStatus = fileSystem.getFileStatus(srcPath); if (srcFileStatus.isDirectory()) { // returns FileStatus objects sorted by filename. String partFilenamePattern = "part-?-?????"; if (codecIfAny != null) { partFilenamePattern += codecIfAny.getDefaultExtension(); }/*from ww w . j a v a2 s.c om*/ Path partPathGlob = new Path(srcPath, partFilenamePattern); partFileStatuses = fileSystem.globStatus(partPathGlob); } else { partFileStatuses = new FileStatus[] { srcFileStatus }; } }