List of usage examples for org.apache.hadoop.fs FileSystem isDirectory
@Deprecated public boolean isDirectory(Path f) throws IOException
From source file:alluxio.underfs.hdfs.acl.SupportedHdfsAclProvider.java
License:Apache License
@Override public Pair<AccessControlList, DefaultAccessControlList> getAcl(FileSystem hdfs, String path) throws IOException { AclStatus hdfsAcl;/* www. j a v a 2s. c om*/ Path filePath = new Path(path); boolean isDir = hdfs.isDirectory(filePath); try { hdfsAcl = hdfs.getAclStatus(filePath); } catch (AclException e) { // When dfs.namenode.acls.enabled is false, getAclStatus throws AclException. return new Pair<>(null, null); } AccessControlList acl = new AccessControlList(); DefaultAccessControlList defaultAcl = new DefaultAccessControlList(); acl.setOwningUser(hdfsAcl.getOwner()); acl.setOwningGroup(hdfsAcl.getGroup()); defaultAcl.setOwningUser(hdfsAcl.getOwner()); defaultAcl.setOwningGroup(hdfsAcl.getGroup()); for (AclEntry entry : hdfsAcl.getEntries()) { alluxio.security.authorization.AclEntry.Builder builder = new alluxio.security.authorization.AclEntry.Builder(); builder.setType(getAclEntryType(entry)); builder.setSubject(entry.getName() == null ? "" : entry.getName()); FsAction permission = entry.getPermission(); if (permission.implies(FsAction.READ)) { builder.addAction(AclAction.READ); } else if (permission.implies(FsAction.WRITE)) { builder.addAction(AclAction.WRITE); } else if (permission.implies(FsAction.EXECUTE)) { builder.addAction(AclAction.EXECUTE); } if (entry.getScope().equals(AclEntryScope.ACCESS)) { acl.setEntry(builder.build()); } else { // default ACL, must be a directory defaultAcl.setEntry(builder.build()); } } if (isDir) { return new Pair<>(acl, defaultAcl); } else { // a null defaultACL indicates this is a file return new Pair<>(acl, null); } }
From source file:br.com.lassal.nqueens.grid.job.GenerateSolutions.java
/** * NQueens working folder structure /nqueens/board-{x}/partial/solution_X-4 * * @param queensSize/*from w w w . j a v a 2s .c om*/ * @throws IOException */ private void setWorkingFolder(int queensSize, Job job) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); if (fs.isDirectory(new Path("/nqueens/board-" + queensSize + "/final"))) { System.exit(0); // ja foi processado anteriormente nao processa de novo } String lastSolution = null; Path partialSolDir = new Path("/nqueens/board-" + queensSize + "/partial/"); Path inputPath = null; Path outputPath = null; if (fs.exists(partialSolDir)) { RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir); while (dirsFound.hasNext()) { LocatedFileStatus path = dirsFound.next(); if (lastSolution == null) { lastSolution = path.getPath().getName(); inputPath = path.getPath(); } else { String currentDir = path.getPath().getName(); if (lastSolution.compareToIgnoreCase(currentDir) < 0) { lastSolution = currentDir; inputPath = path.getPath(); } } } } int currentSolutionSet = 0; if (inputPath == null) { inputPath = new Path("/nqueens/board-" + queensSize + "/seed"); if (!fs.exists(inputPath)) { FSDataOutputStream seedFile = fs.create(inputPath, true); seedFile.writeBytes(queensSize + "#"); seedFile.close(); } } // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); if (lastSolution != null) { String[] solution = lastSolution.split("-"); if (solution[0].equalsIgnoreCase("solution_" + queensSize)) { currentSolutionSet = Integer.parseInt(solution[1]) + 4; if (currentSolutionSet >= queensSize) { outputPath = new Path("/nqueens/board-" + queensSize + "/final"); } else { outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-" + currentSolutionSet); } } } else { outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-4"); } // Output FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); }
From source file:co.cask.hydrator.plugin.batch.action.FileAction.java
License:Apache License
@SuppressWarnings("ConstantConditions") @Override/*from ww w . j a v a 2 s . c o m*/ public void run(BatchActionContext context) throws Exception { if (!config.shouldRun(context)) { return; } config.substituteMacros(context); Job job = JobUtils.createInstance(); Configuration conf = job.getConfiguration(); FileSystem fileSystem = FileSystem.get(conf); Path[] paths; Path sourcePath = new Path(config.path); if (fileSystem.isDirectory(sourcePath)) { FileStatus[] status = fileSystem.listStatus(sourcePath); paths = FileUtil.stat2Paths(status); } else { paths = new Path[] { sourcePath }; } //get regex pattern for file name filtering. boolean patternSpecified = !Strings.isNullOrEmpty(config.pattern); if (patternSpecified) { regex = Pattern.compile(config.pattern); } switch (config.action.toLowerCase()) { case "delete": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { fileSystem.delete(path, true); } } break; case "move": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { Path targetFileMovePath = new Path(config.targetFolder, path.getName()); fileSystem.rename(path, targetFileMovePath); } } break; case "archive": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { try (FSDataOutputStream archivedStream = fileSystem .create(new Path(config.targetFolder, path.getName() + ".zip")); ZipOutputStream zipArchivedStream = new ZipOutputStream(archivedStream); FSDataInputStream fdDataInputStream = fileSystem.open(path)) { zipArchivedStream.putNextEntry(new ZipEntry(path.getName())); int length; byte[] buffer = new byte[1024]; while ((length = fdDataInputStream.read(buffer)) > 0) { zipArchivedStream.write(buffer, 0, length); } zipArchivedStream.closeEntry(); } fileSystem.delete(path, true); } } break; default: LOG.warn("No action required on the file."); break; } }
From source file:com.alibaba.jstorm.hdfs.spout.DirLock.java
License:Apache License
private DirLock(FileSystem fs, Path lockFile) throws IOException { if (fs.isDirectory(lockFile)) { throw new IllegalArgumentException(lockFile.toString() + " is not a directory"); }// w ww .j a v a 2s . co m this.fs = fs; this.lockFile = lockFile; }
From source file:com.alibaba.jstorm.hdfs.spout.HdfsSpout.java
License:Apache License
private static void validateOrMakeDir(FileSystem fs, Path dir, String dirDescription) { try {/*w ww .ja v a 2s. c o m*/ if (fs.exists(dir)) { if (!fs.isDirectory(dir)) { LOG.error(dirDescription + " directory is a file, not a dir. " + dir); throw new RuntimeException(dirDescription + " directory is a file, not a dir. " + dir); } } else if (!fs.mkdirs(dir)) { LOG.error("Unable to create " + dirDescription + " directory " + dir); throw new RuntimeException("Unable to create " + dirDescription + " directory " + dir); } } catch (IOException e) { LOG.error("Unable to create " + dirDescription + " directory " + dir, e); throw new RuntimeException("Unable to create " + dirDescription + " directory " + dir, e); } }
From source file:com.awcoleman.StandaloneJava.AvroCombinerByBlock.java
License:Apache License
public AvroCombinerByBlock(String inDirStr, String outDirStr, String handleExisting) throws IOException { //handle both an output directory and an output filename (ending with .avro) String outputFilename = DEFAULTOUTPUTFILENAME; if (outDirStr.endsWith(".avro")) { isOutputNameSpecifiedAndAFile = true; //String[] outputParts = outDirStr.split(":?\\\\"); String[] outputParts = outDirStr.split("/"); outputFilename = outputParts[outputParts.length - 1]; //remove outputFilename from outDirStr to get new outDirStr which is just directory (and trailing /) outDirStr = outDirStr.replaceAll(Pattern.quote(outputFilename), ""); outDirStr = outDirStr.substring(0, outDirStr.length() - (outDirStr.endsWith("/") ? 1 : 0)); }//from ww w. ja v a 2 s . com //Get block size - not needed //long hdfsBlockSize = getBlockSize(); //System.out.println("HDFS FS block size: "+hdfsBlockSize); //Get list of input files ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>(); Configuration conf = new Configuration(); conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")); conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less FileSystem hdfs = null; try { hdfs = FileSystem.get(conf); } catch (java.io.IOException ioe) { System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage()); System.exit(1); } if (hdfs.getStatus() == null) { System.out.println("Unable to contact HDFS filesystem. Exiting."); System.exit(1); } //Check if input and output dirs exist Path inDir = new Path(inDirStr); Path outDir = new Path(outDirStr); if (!(hdfs.exists(inDir) || hdfs.isDirectory(inDir))) { System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting."); System.exit(1); } if (!(hdfs.exists(outDir) || hdfs.isDirectory(outDir))) { if (hdfs.exists(outDir)) { //outDir exists and is a symlink or file, must die System.out.println("Requested output directory name ( " + outDirStr + " ) exists but is not a directory. Exiting."); System.exit(1); } else { hdfs.mkdirs(outDir); } } RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inDir, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) { inputFileList.add((FileStatus) fileStatus); } } if (inputFileList.size() <= 1 && !isOutputNameSpecifiedAndAFile) { //If an output file is specified assume we just want a rename. System.out.println("Only one or zero files found in input directory ( " + inDirStr + " ). Exiting."); System.exit(1); } //Get Schema and Compression Codec from seed file since we need it for the writer Path firstFile = inputFileList.get(0).getPath(); FsInput fsin = new FsInput(firstFile, conf); DataFileReader<Object> dfrFirstFile = new DataFileReader<Object>(fsin, new GenericDatumReader<Object>()); Schema fileSchema = dfrFirstFile.getSchema(); String compCodecName = dfrFirstFile.getMetaString("avro.codec"); //compCodecName should be null, deflate, snappy, or bzip2 if (compCodecName == null) { compCodecName = "deflate"; //set to deflate even though original is no compression } dfrFirstFile.close(); //Create Empty HDFS file in output dir String seedFileStr = outDirStr + "/" + outputFilename; Path seedFile = new Path(seedFileStr); FSDataOutputStream hdfsdos = null; try { hdfsdos = hdfs.create(seedFile, false); } catch (org.apache.hadoop.fs.FileAlreadyExistsException faee) { if (handleExisting.equals("overwrite")) { hdfs.delete(seedFile, false); hdfsdos = hdfs.create(seedFile, false); } else if (handleExisting.equals("append")) { hdfsdos = hdfs.append(seedFile); } else { System.out .println("File " + seedFileStr + " exists and will not overwrite. handleExisting is set to " + handleExisting + ". Exiting."); System.exit(1); } } if (hdfsdos == null) { System.out.println("Unable to create or write to output file ( " + seedFileStr + " ). handleExisting is set to " + handleExisting + ". Exiting."); System.exit(1); } //Append other files GenericDatumWriter gdw = new GenericDatumWriter(fileSchema); DataFileWriter dfwBase = new DataFileWriter(gdw); //Set compression to that found in the first file dfwBase.setCodec(CodecFactory.fromString(compCodecName)); DataFileWriter dfw = dfwBase.create(fileSchema, hdfsdos); for (FileStatus thisFileStatus : inputFileList) { //_SUCCESS files are 0 bytes if (thisFileStatus.getLen() == 0) { continue; } FsInput fsin1 = new FsInput(thisFileStatus.getPath(), conf); DataFileReader dfr = new DataFileReader<Object>(fsin1, new GenericDatumReader<Object>()); dfw.appendAllFrom(dfr, false); dfr.close(); } dfw.close(); dfwBase.close(); }
From source file:com.awcoleman.StandaloneJava.AvroCombinerByRecord.java
License:Apache License
public AvroCombinerByRecord(String inDirStr, String outDirStr) throws IOException { //Get list of input files ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>(); Configuration conf = new Configuration(); conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")); FileSystem hdfs = FileSystem.get(conf); //Check if input and output dirs exist Path inDir = new Path(inDirStr); Path outDir = new Path(outDirStr); if (!(hdfs.exists(inDir) || hdfs.isDirectory(inDir))) { System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting."); System.exit(1);// ww w . j a va2 s .c om } if (!(hdfs.exists(outDir) || hdfs.isDirectory(outDir))) { if (hdfs.exists(outDir)) { //outDir exists and is a symlink or file, must die System.out.println("Requested output directory name ( " + outDirStr + " ) exists but is not a directory. Exiting."); System.exit(1); } else { hdfs.mkdirs(outDir); } } RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inDir, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); if (fileStatus.isFile()) { inputFileList.add((FileStatus) fileStatus); } } if (inputFileList.size() <= 1) { System.out.println("Only one or zero files found in input directory ( " + inDirStr + " ). Exiting."); System.exit(1); } //Get Schema and Compression Codec from seed file since we need it for the writer Path firstFile = inputFileList.get(0).getPath(); FsInput fsin = new FsInput(firstFile, conf); DataFileReader<Object> dfrFirstFile = new DataFileReader<Object>(fsin, new GenericDatumReader<Object>()); Schema fileSchema = dfrFirstFile.getSchema(); String compCodecName = dfrFirstFile.getMetaString("avro.codec"); dfrFirstFile.close(); //Create Empty HDFS file in output dir Path seedFile = new Path(outDirStr + "/combinedByRecord.avro"); FSDataOutputStream hdfsdos = hdfs.create(seedFile, false); //Append other files GenericDatumWriter gdw = new GenericDatumWriter(fileSchema); DataFileWriter dfwBase = new DataFileWriter(gdw); //Set compression to that found in the first file dfwBase.setCodec(CodecFactory.fromString(compCodecName)); DataFileWriter dfw = dfwBase.create(fileSchema, hdfsdos); for (FileStatus thisFileStatus : inputFileList) { DataFileStream<Object> avroStream = null; FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath()); GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); avroStream = new DataFileStream<Object>(inStream, reader); long recordCounter = 0; while (avroStream.hasNext()) { dfw.append(avroStream.next()); recordCounter++; } avroStream.close(); inStream.close(); System.out.println("Appended " + recordCounter + " records from " + thisFileStatus.getPath().getName() + " to " + seedFile.getName()); } dfw.close(); dfwBase.close(); }
From source file:com.awcoleman.StandaloneJava.AvroCounterByBlock.java
License:Apache License
public AvroCounterByBlock(String inDirStr) throws IOException { long numAvroRecords = 0; //Get list of input files ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>(); Configuration conf = new Configuration(); conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")); conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less FileSystem hdfs = null; try {/*from w ww .j ava 2 s . c om*/ hdfs = FileSystem.get(conf); } catch (java.io.IOException ioe) { System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage()); System.exit(1); } if (hdfs.getStatus() == null) { System.out.println("Unable to contact HDFS filesystem. Exiting."); System.exit(1); } //Check if input dirs/file exists and get file list (even if list of single file) Path inPath = new Path(inDirStr); if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file inputFileList.add(hdfs.getFileStatus(inPath)); } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir //Get list of input files RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) { inputFileList.add((FileStatus) fileStatus); } } } else { System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting."); System.exit(1); } for (FileStatus thisFileStatus : inputFileList) { //_SUCCESS files are 0 bytes if (thisFileStatus.getLen() == 0) { continue; } DataFileStream<Object> dfs = null; FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath()); GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); dfs = new DataFileStream<Object>(inStream, reader); long thisFileRecords = 0; while (dfs.hasNext()) { numAvroRecords = numAvroRecords + dfs.getBlockCount(); thisFileRecords = thisFileRecords + dfs.getBlockCount(); //System.out.println("Input file "+thisFileStatus.getPath()+" getBlockCount() is "+dfs.getBlockCount()+"." ); dfs.nextBlock(); } System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records."); dfs.close(); inStream.close(); //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die. } System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and " + numAvroRecords + " total records."); }
From source file:com.awcoleman.StandaloneJava.AvroCounterByRecord.java
License:Apache License
public AvroCounterByRecord(String inDirStr) throws IOException { long numAvroRecords = 0; //Get list of input files ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>(); Configuration conf = new Configuration(); conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")); conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less FileSystem hdfs = null; try {//w ww.j a va 2 s . co m hdfs = FileSystem.get(conf); } catch (java.io.IOException ioe) { System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage()); System.exit(1); } if (hdfs.getStatus() == null) { System.out.println("Unable to contact HDFS filesystem. Exiting."); System.exit(1); } //Check if input dirs/file exists and get file list (even if list of single file) Path inPath = new Path(inDirStr); if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file inputFileList.add(hdfs.getFileStatus(inPath)); } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir //Get list of input files RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) { inputFileList.add((FileStatus) fileStatus); } } } else { System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting."); System.exit(1); } for (FileStatus thisFileStatus : inputFileList) { //_SUCCESS files are 0 bytes if (thisFileStatus.getLen() == 0) { continue; } DataFileStream<Object> avroStream = null; FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath()); GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); avroStream = new DataFileStream<Object>(inStream, reader); long thisFileRecords = 0; while (avroStream.hasNext()) { numAvroRecords++; thisFileRecords++; avroStream.next(); } avroStream.close(); inStream.close(); System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records."); //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die. } System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and " + numAvroRecords + " total records."); }
From source file:com.cloudera.cdk.data.filesystem.FileSystemPartitionIterator.java
License:Apache License
FileSystemPartitionIterator(FileSystem fs, Path root, PartitionStrategy strategy, MarkerRange range) throws IOException { Preconditions.checkArgument(fs.isDirectory(root)); this.fs = fs; this.rootDirectory = root; this.iterator = Iterators.filter(Iterators .transform(new FileSystemIterator(strategy.getFieldPartitioners().size()), new MakeKey(strategy)), new InRange(range)); }