List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:cc.solr.lucene.store.hdfs.HdfsDirectory.java
License:Apache License
@Override public String[] listAll() throws IOException { FileStatus[] listStatus = getFileSystem().listStatus(_hdfsDirPath); List<String> files = new ArrayList<String>(); if (listStatus == null) { return new String[] {}; }// w ww.java 2 s . c om for (FileStatus status : listStatus) { if (!status.isDir()) { files.add(status.getPath().getName()); } } return getNormalNames(files); }
From source file:ch.cern.db.hdfs.Main.java
License:GNU General Public License
private void printFileStatus(FileStatus status) { System.out.println();//www .j a va 2 s.co m System.out.println("Showing metadata for: " + status.getPath()); System.out.println(" isDirectory: " + status.isDirectory()); System.out.println(" isFile: " + status.isFile()); System.out.println(" isSymlink: " + status.isSymlink()); System.out.println(" encrypted: " + status.isEncrypted()); System.out.println(" length: " + status.getLen()); System.out.println(" replication: " + status.getReplication()); System.out.println(" blocksize: " + status.getBlockSize()); System.out.println(" modification_time: " + new Date(status.getModificationTime())); System.out.println(" access_time: " + new Date(status.getAccessTime())); System.out.println(" owner: " + status.getOwner()); System.out.println(" group: " + status.getGroup()); System.out.println(" permission: " + status.getPermission()); System.out.println(); }
From source file:ch.ethz.las.wikimining.mr.io.IntegerSequenceFileReader.java
@Override protected void readContent(FileStatus status) throws IOException { try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(), config)) { IntWritable key = (IntWritable) ReflectionUtils.newInstance(reader.getKeyClass(), config); IntWritable value = (IntWritable) ReflectionUtils.newInstance(reader.getValueClass(), config); while (reader.next(key, value)) { map.put(key.get(), value.get()); }/*from w ww. j a va2s . c o m*/ } }
From source file:ch.ethz.las.wikimining.mr.io.SequenceFileReader.java
public HashMap<E, V> read() throws IOException { if (!fs.exists(path)) { throw new IOException(path + " does not exist!"); }/* w w w.j a v a2s. c om*/ final FileStatus[] statuses = fs.listStatus(path); for (FileStatus status : statuses) { if (status.isDirectory()) { continue; } if ("_SUCCESS".equals(status.getPath().getName())) { continue; } readContent(status); } return map; }
From source file:cmd.download.java
License:Apache License
private void mergeToLocalFile(FileSystem fs, Path src, String outPath, Configuration configuration) throws FileNotFoundException, IOException { FileStatus[] status = fs.listStatus(src); Map<String, Path> paths = new TreeMap<String, Path>(); for (FileStatus fileStatus : status) { Path path = fileStatus.getPath(); String pathName = path.getName(); if (pathName.startsWith(Constants.NAME_SECOND)) { paths.put(pathName, path);//from w w w. j a va 2 s. c o m } } File outFile = new File(outPath, Names.indexId2Node + ".dat"); OutputStream out = new FileOutputStream(outFile); for (String pathName : paths.keySet()) { Path path = new Path(src, paths.get(pathName)); log.debug("Concatenating {} into {}...", path.toUri(), outFile.getAbsoluteFile()); InputStream in = fs.open(new Path(path, Names.indexId2Node + ".dat")); IOUtils.copyBytes(in, out, configuration, false); in.close(); } out.close(); }
From source file:cmd.download.java
License:Apache License
private void mergeToLocalFile2(FileSystem fs, Path src, String outPath, Configuration configuration) throws FileNotFoundException, IOException { // Find all the right paths and copy .gz files locally FileStatus[] status = fs.listStatus(src); Map<String, Path> paths = new TreeMap<String, Path>(); for (FileStatus fileStatus : status) { Path path = fileStatus.getPath(); String pathName = path.getName(); if (pathName.startsWith(Constants.NAME_FOURTH)) { paths.put(pathName, path);// w w w. j a v a2s . c o m } } for (String pathName : paths.keySet()) { Path path = new Path(src, paths.get(pathName)); status = fs.listStatus(path); for (FileStatus fileStatus : status) { Path p = fileStatus.getPath(); log.debug("Copying {} to {}...", p.toUri(), outPath); fs.copyToLocalFile(p, new Path(outPath, p.getName())); } } // Merge .gz files into indexName.gz File fileOutputPath = new File(outPath); File[] files = fileOutputPath.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return pathname.getName().endsWith(".gz"); } }); Arrays.sort(files); String prevIndexName = null; OutputStream out = null; for (File file : files) { log.debug("Processing {}... ", file.getName()); String indexName = file.getName().substring(0, file.getName().indexOf("_")); if (prevIndexName == null) prevIndexName = indexName; if (out == null) out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz"))); if (!prevIndexName.equals(indexName)) { if (out != null) out.close(); log.debug("Index name set to {}", indexName); out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz"))); } InputStream in = new GZIPInputStream(new FileInputStream(file)); log.debug("Copying {} into {}.gz ...", file.getName(), indexName); IOUtils.copyBytes(in, out, 8192, false); in.close(); file.delete(); prevIndexName = indexName; } if (out != null) out.close(); // build B+Tree indexes Location location = new Location(outPath); for (String idxName : Constants.indexNames) { log.debug("Creating {} index...", idxName); String indexFilename = location.absolute(idxName, "gz"); if (new File(indexFilename).exists()) { new File(outPath, idxName + ".dat").delete(); new File(outPath, idxName + ".idn").delete(); CmdIndexBuild.main(location.getDirectoryPath(), idxName, indexFilename); // To save some disk space new File(indexFilename).delete(); } } }
From source file:cmd.tdbloader4.java
License:Apache License
private void createOffsetsFile(FileSystem fs, String input, String output) throws IOException { log.debug("Creating offsets file..."); Map<Long, Long> offsets = new TreeMap<Long, Long>(); FileStatus[] status = fs.listStatus(new Path(input)); for (FileStatus fileStatus : status) { Path file = fileStatus.getPath(); if (file.getName().startsWith("part-r-")) { log.debug("Processing: {}", file.getName()); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(file))); String line = in.readLine(); String[] tokens = line.split("\\s"); long partition = Long.valueOf(tokens[0]); long offset = Long.valueOf(tokens[1]); log.debug("Partition {} has offset {}", partition, offset); offsets.put(partition, offset); }//from w w w . j av a 2 s . c o m } Path outputPath = new Path(output, Constants.OFFSETS_FILENAME); PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outputPath))); for (Long partition : offsets.keySet()) { out.println(partition + "\t" + offsets.get(partition)); } out.close(); log.debug("Offset file created."); }
From source file:cn.lhfei.hadoop.ch03.ShowFileStatusTest.java
License:Apache License
@Test public void fileStatusForFile() throws IOException { Path path = new Path("/dir/file"); FileStatus stat = fs.getFileStatus(path); Assert.assertEquals(stat.getPath().toUri().getPath(), "/dir/file"); Assert.assertTrue(stat.isFile());/*from ww w.jav a2 s .co m*/ stat.getBlockSize(); }
From source file:co.cask.cdap.data.stream.StreamInputSplitFinder.java
License:Apache License
/** * Get the input splits for a stream./*from w w w . jav a 2 s .co m*/ * * @param conf Configuration of the filesystem the stream resides on. * @return List of input splits for the stream. * @throws IOException */ public List<T> getSplits(Configuration conf) throws IOException { List<T> splits = Lists.newArrayList(); // Collects all stream event files timestamp, size and block locations information // First grab all directories (partition) that matches with the time range. FileSystem fs = path.getFileSystem(conf); for (FileStatus partitionStatus : fs.listStatus(path)) { // partition should be directory String pathName = partitionStatus.getPath().getName(); if (!partitionStatus.isDirectory() || !StreamUtils.isPartition(pathName)) { continue; } // Match the time range long partitionStartTime = StreamUtils.getPartitionStartTime(pathName); long partitionEndTime = StreamUtils.getPartitionEndTime(pathName); if (partitionStartTime > endTime || partitionEndTime <= startTime) { continue; } // Collects all bucket file status in the partition. Collection<StreamDataFileSplitter> eventFiles = collectBuckets(fs, partitionStatus.getPath()); // For each bucket inside the partition directory, compute the splits for (StreamDataFileSplitter splitter : eventFiles) { splitter.computeSplits(fs, minSplitSize, maxSplitSize, startTime, endTime, splits, splitFactory); } } return splits; }
From source file:co.cask.cdap.data.stream.StreamInputSplitFinder.java
License:Apache License
/** * Collects file status of all buckets under a given partition. *//*from ww w . j a va2s.c o m*/ private Collection<StreamDataFileSplitter> collectBuckets(FileSystem fs, Path partitionPath) throws IOException { ImmutableList.Builder<StreamDataFileSplitter> builder = ImmutableList.builder(); for (FileStatus fileStatus : fs.listStatus(partitionPath)) { if (StreamFileType.EVENT.isMatched(fileStatus.getPath().getName())) { builder.add(new StreamDataFileSplitter(fileStatus)); } } return builder.build(); }