List of usage examples for org.apache.hadoop.fs FileStatus isDirectory
public boolean isDirectory()
From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java
License:LGPL
@Override public List<DataFile> list(final DataFile file) throws IOException { final Path path = getPath(file); if (path == null) { throw new NullPointerException("Path to delete is null"); }//w w w. java 2 s. c om if (this.conf == null) { throw new NullPointerException("The configuration object is null"); } final FileSystem fs = path.getFileSystem(this.conf); if (fs == null) { throw new IOException("Unable to delete the file, The FileSystem is null"); } FileStatus fileStatus = fs.getFileStatus(path); if (!fs.exists(path)) { throw new FileNotFoundException("File not found: " + file); } if (!fileStatus.isDirectory()) { throw new IOException("The file is not a directory: " + file); } // List directory final FileStatus[] files = fs.listStatus(path); // Convert the File array to a list of DataFile final List<DataFile> result = new ArrayList<>(files.length); for (FileStatus f : files) { result.add(new DataFile(f.getPath().toUri().toString())); } // Return an unmodifiable list return Collections.unmodifiableList(result); }
From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.PathDataProtocol.java
License:LGPL
@Override public DataFileMetadata getMetadata(final DataFile src) throws IOException { if (!exists(src, true)) { throw new FileNotFoundException("File not found: " + src); }//from ww w . j a v a 2 s . co m final Path path = getPath(src); final FileStatus status = path.getFileSystem(this.conf).getFileStatus(path); final SimpleDataFileMetadata result = new SimpleDataFileMetadata(); result.setContentLength(status.getLen()); result.setLastModified(status.getModificationTime()); result.setDataFormat(DataFormatRegistry.getInstance().getDataFormatFromFilename(src.getName())); final CompressionType ct = CompressionType.getCompressionTypeByFilename(src.getSource()); if (ct != null) { result.setContentEncoding(ct.getContentEncoding()); } if (status.isDirectory()) { result.setDirectory(true); } if (status.isSymlink()) { result.setSymbolicLink(new DataFile(status.getSymlink().toUri())); } return result; }
From source file:fuse4j.hadoopfs.HdfsClientReal.java
License:Apache License
/** * getFileInfo()// w ww . j a v a2s .c om */ public HdfsFileAttr getFileInfo(String path) { try { FileStatus dfsStat = dfs.getFileStatus(new Path(path)); final boolean directory = dfsStat.isDirectory(); final int inode = 0; final int mode = dfsStat.getPermission().toShort(); final int uid = userCache.getUid(dfsStat.getOwner()); final int gid = 0; // TODO: per-file block-size can't be retrieved correctly, // using default block size for now. final long size = dfsStat.getLen(); final int blocks = (int) Math.ceil(((double) size) / dfs.getDefaultBlockSize()); // modification/create-times are the same as access-time final int modificationTime = (int) (dfsStat.getModificationTime() / 1000); HdfsFileAttr hdfsFileAttr = new HdfsFileAttr(directory, inode, mode, uid, gid, 1); hdfsFileAttr.setSize(size, blocks); hdfsFileAttr.setTime(modificationTime); // TODO Hack to set inode; hdfsFileAttr.inode = hdfsFileAttr.hashCode(); return hdfsFileAttr; } catch (IOException ioe) { // fall through to failure } // failed return null; }
From source file:fuse4j.hadoopfs.HdfsClientReal.java
License:Apache License
private HdfsDirEntry newHdfsDirEntry(FileStatus fileStatus) { final boolean directory = fileStatus.isDirectory(); final String name = fileStatus.getPath().getName(); final FsPermission permission = fileStatus.getPermission(); return new HdfsDirEntry(directory, name, permission.toShort()); }
From source file:gaffer.analytic.impl.GraphStatistics.java
License:Apache License
public int run(String[] args) throws Exception { // Usage//from w w w . j a va 2 s. c o m if (args.length != 6 && args.length != 7) { System.err.println(USAGE); return 1; } // Parse options Path outputPath = new Path(args[0]); String accumuloPropertiesFile = args[1]; int numReduceTasks; try { numReduceTasks = Integer.parseInt(args[2]); } catch (NumberFormatException e) { System.err.println(USAGE); return 1; } Date startDate = null; Date endDate = null; boolean useTimeWindow = false; if (!args[3].equals("null") && !args[4].equals("null")) { try { startDate = DATE_FORMAT.parse(args[3]); endDate = DATE_FORMAT.parse(args[4]); } catch (ParseException e) { System.err.println("Error parsing dates: " + args[3] + " " + args[4] + " " + e.getMessage()); return 1; } useTimeWindow = true; } boolean rollUpOverTimeAndVisibility = Boolean.parseBoolean(args[5]); boolean seedsSpecified = (args.length == 7); String seedsFile = ""; if (seedsSpecified) { seedsFile = args[6]; } // Hadoop configuration Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); // Connect to Accumulo, so we can check connection and check that the // table exists AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile); Connector conn = Accumulo.connect(accConf); String tableName = accConf.getTable(); Authorizations authorizations = conn.securityOperations().getUserAuthorizations(accConf.getUserName()); // Check if the table exists if (!conn.tableOperations().exists(tableName)) { System.err.println("Table " + tableName + " does not exist."); return 1; } // Create graph and update configuration based on the view AccumuloBackedGraph graph = new AccumuloBackedGraph(conn, tableName); if (useTimeWindow) { graph.setTimeWindow(startDate, endDate); } graph.rollUpOverTimeAndVisibility(rollUpOverTimeAndVisibility); if (seedsSpecified) { Set<TypeValue> typeValues = new HashSet<TypeValue>(); BufferedReader reader = new BufferedReader(new FileReader(seedsFile)); String line; while ((line = reader.readLine()) != null) { String[] tokens = line.split("\\|"); if (tokens.length != 2) { System.err.println("Invalid line: " + line); continue; } String type = tokens[0]; String value = tokens[1]; typeValues.add(new TypeValue(type, value)); } reader.close(); graph.setConfiguration(conf, typeValues, accConf); } else { graph.setConfiguration(conf, accConf); } // Conf conf.setBoolean("mapred.compress.map.output", true); conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class); // Job Job job = new Job(conf); job.setJarByClass(getClass()); job.setJobName("Running MapReduce against Gaffer data in Accumulo: input = " + tableName + ", output = " + outputPath); // Input format - use BatchScannerElementInputFormat if seeds have been specified (as that creates fewer // splits); otherwise use ElementInputFormat which is based on the standard AccumuloInputFormat. if (seedsSpecified) { job.setInputFormatClass(BatchScannerElementInputFormat.class); } else { job.setInputFormatClass(ElementInputFormat.class); } // Mapper job.setMapperClass(GraphStatisticsMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(SetOfStatistics.class); // Combiner job.setCombinerClass(GraphStatisticsReducer.class); // Reducer job.setReducerClass(GraphStatisticsReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SetOfStatistics.class); job.setNumReduceTasks(numReduceTasks); // Output job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); System.out.println("Running MapReduce job over:"); System.out.println("\tTable: " + accConf.getTable()); System.out.println("\tUser: " + accConf.getUserName()); System.out.println("\tAuths: " + authorizations); if (useTimeWindow) { System.out.println("\tFilter by time: start time is " + DATE_FORMAT.format(startDate) + ", " + DATE_FORMAT.format(endDate)); } else { System.out.println("\tFilter by time is off"); } System.out.println("\tRoll up over time and visibility: " + rollUpOverTimeAndVisibility); // Run job job.waitForCompletion(true); // Successful? if (!job.isSuccessful()) { System.err.println("Error running job"); return 1; } // Write results out System.out.println("Summary of graph"); for (FileStatus file : fs.listStatus(outputPath)) { if (!file.isDirectory() && !file.getPath().getName().contains("_SUCCESS")) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf); Text text = new Text(); SetOfStatistics stats = new SetOfStatistics(); while (reader.next(text, stats)) { System.out.println(text + ", " + stats); } reader.close(); } } return 0; }
From source file:gobblin.compaction.dataset.TimeBasedSubDirDatasetsFinder.java
License:Apache License
/** * Each subdir in {@link DatasetsFinder#inputDir} is considered a dataset, if it satisfies blacklist and whitelist. *//* w w w . j a v a 2 s . c o m*/ @Override public Set<Dataset> findDistinctDatasets() throws IOException { Set<Dataset> datasets = Sets.newHashSet(); for (FileStatus datasetsFileStatus : this.fs.globStatus(new Path(inputDir, subDirPattern))) { log.info("Scanning directory : " + datasetsFileStatus.getPath().toString()); if (datasetsFileStatus.isDirectory()) { String datasetName = getDatasetName(datasetsFileStatus.getPath().toString(), inputDir); if (DatasetFilterUtils.survived(datasetName, this.blacklist, this.whitelist)) { log.info("Found dataset: " + datasetName); Path inputPath = new Path(this.inputDir, new Path(datasetName, this.inputSubDir)); Path inputLatePath = new Path(this.inputDir, new Path(datasetName, this.inputLateSubDir)); Path outputPath = new Path(this.destDir, new Path(datasetName, this.destSubDir)); Path outputLatePath = new Path(this.destDir, new Path(datasetName, this.destLateSubDir)); Path outputTmpPath = new Path(this.tmpOutputDir, new Path(datasetName, this.destSubDir)); double priority = this.getDatasetPriority(datasetName); String folderStructure = getFolderStructure(); for (FileStatus status : this.fs.globStatus(new Path(inputPath, folderStructure))) { Path jobInputPath = status.getPath(); DateTime folderTime = null; try { folderTime = getFolderTime(jobInputPath, inputPath); } catch (RuntimeException e) { log.warn("{} is not a valid folder. Will be skipped due to exception.", jobInputPath, e); continue; } if (folderWithinAllowedPeriod(jobInputPath, folderTime)) { Path jobInputLatePath = appendFolderTime(inputLatePath, folderTime); Path jobOutputPath = appendFolderTime(outputPath, folderTime); Path jobOutputLatePath = appendFolderTime(outputLatePath, folderTime); Path jobOutputTmpPath = appendFolderTime(outputTmpPath, folderTime); Dataset timeBasedDataset = new Dataset.Builder().withPriority(priority) .withDatasetName(datasetName) .addInputPath(this.recompactDatasets ? jobOutputPath : jobInputPath) .addInputLatePath(this.recompactDatasets ? jobOutputLatePath : jobInputLatePath) .withOutputPath(jobOutputPath).withOutputLatePath(jobOutputLatePath) .withOutputTmpPath(jobOutputTmpPath).build(); // Stores the extra information for timeBasedDataset timeBasedDataset.setJobProp(MRCompactor.COMPACTION_JOB_DEST_PARTITION, folderTime.toString(this.timeFormatter)); timeBasedDataset.setJobProp(MRCompactor.COMPACTION_INPUT_PATH_TIME, folderTime.getMillis()); datasets.add(timeBasedDataset); } } } } } return datasets; }
From source file:gobblin.compaction.hive.HdfsReader.java
License:Apache License
public static String getFirstDataFilePathInDir(String dirInHdfs) throws IOException { FileStatus[] fileStatuses = getFileSystem().listStatus(new Path(dirInHdfs)); for (FileStatus fileStatus : fileStatuses) { Path dataFilePath = fileStatus.getPath(); if (!fileStatus.isDirectory() && !dataFilePath.getName().startsWith("_")) { return dataFilePath.toString(); }/*from w w w .j a v a2 s . co m*/ } String message = dirInHdfs + " does not contain a valid data file."; LOG.error(message); throw new RuntimeException(message); }
From source file:gobblin.compaction.hive.HdfsWriter.java
License:Apache License
public static void moveSelectFiles(String extension, String source, String destination) throws IOException { FileSystem fs = getFileSystem(); fs.mkdirs(new Path(destination)); FileStatus[] fileStatuses = fs.listStatus(new Path(source)); for (FileStatus fileStatus : fileStatuses) { Path path = fileStatus.getPath(); if (!fileStatus.isDirectory() && path.toString().toLowerCase().endsWith(extension.toLowerCase())) { HadoopUtils.deleteIfExists(fs, new Path(destination), true); HadoopUtils.copyPath(fs, path, fs, new Path(destination), getConfiguration()); }/*from ww w . j av a 2s. co m*/ } }
From source file:gobblin.compaction.mapreduce.avro.MRCompactorAvroKeyDedupJobRunner.java
License:Apache License
public static Schema getNewestSchemaFromSource(Path sourceDir, FileSystem fs) throws IOException { FileStatus[] files = fs.listStatus(sourceDir); Arrays.sort(files, new LastModifiedDescComparator()); for (FileStatus status : files) { if (status.isDirectory()) { Schema schema = getNewestSchemaFromSource(status.getPath(), fs); if (schema != null) return schema; } else if (FilenameUtils.isExtension(status.getPath().getName(), AVRO)) { return AvroUtils.getSchemaFromDataFile(status.getPath(), fs); }/*from w ww .jav a2s . co m*/ } return null; }
From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStore.java
License:Apache License
/** * Retrieves all the children of the given {@link ConfigKeyPath} by doing a {@code ls} on the {@link Path} specified * by the {@link ConfigKeyPath}. If the {@link Path} described by the {@link ConfigKeyPath} does not exist, an empty * {@link Collection} is returned./*from ww w .j ava 2s . c o m*/ * * @param configKey the config key path whose children are necessary. * @param version specify the configuration version in the configuration store. * * @return a {@link Collection} of {@link ConfigKeyPath} where each entry is a child of the given configKey. * * @throws VersionDoesNotExistException if the version specified cannot be found in the {@link ConfigStore}. */ @Override public Collection<ConfigKeyPath> getChildren(ConfigKeyPath configKey, String version) throws VersionDoesNotExistException { Preconditions.checkNotNull(configKey, "configKey cannot be null!"); Preconditions.checkArgument(!Strings.isNullOrEmpty(version), "version cannot be null or empty!"); List<ConfigKeyPath> children = new ArrayList<>(); Path datasetDir = getDatasetDirForKey(configKey, version); try { if (!this.fs.exists(datasetDir)) { return children; } for (FileStatus fileStatus : this.fs.listStatus(datasetDir)) { if (fileStatus.isDirectory()) { children.add(configKey.createChild(fileStatus.getPath().getName())); } } return children; } catch (IOException e) { throw new RuntimeException( String.format("Error while getting children for configKey: \"%s\"", configKey), e); } }