List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:TestIndexMergeMR.java
License:Open Source License
public void testIndexMergeMR() throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); String indexdir = "indexdir"; String indexdir1 = "indexdir1"; int filenum = 10; int recnum = 1000; short idx = 0; TestUtil.genifdfindex(indexdir, filenum, recnum, idx, true); StringBuffer sb = new StringBuffer(); FileStatus[] ss = fs.listStatus(new Path(indexdir)); for (FileStatus fileStatus : ss) { sb.append(fileStatus.getPath().toString()).append(","); }/*from w w w. j a va 2 s. c o m*/ IndexMergeMR.running(sb.substring(0, sb.length() - 1), indexdir1, conf); IFormatDataFile ifdf = new IFormatDataFile(conf); ifdf.open(indexdir1 + "/part-00000"); for (int i = 0; i < 100; i++) { ifdf.next().show(); } ifdf.close(); fs.delete(new Path(indexdir), true); fs.delete(new Path(indexdir1), true); }
From source file:DupleInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. * @param job the job context/* ww w . j a va 2 s.com*/ * @throws IOException */ public List<InputSplit> getSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); // times that each file exists in the files List ArrayList<Integer> times = new ArrayList<Integer>(); ArrayList<Path> paths = new ArrayList<Path>(); for (FileStatus file : files) { Path path = file.getPath(); long length = file.getLen(); if (length != 0) { FileSystem fs = path.getFileSystem(job.getConfiguration()); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); int index; if ((index = paths.indexOf(path)) != -1) times.set(index, times.get(index) + 1); else { times.add(0); paths.add(path); index = times.size() - 1; } // not splitable splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(), times.get(index))); } else { //Create empty hosts array for zero length files splits.add(makeSplit(path, 0, length, new String[0])); } } // Save the number of input files for metrics/loadgen job.getConfiguration().setLong(NUM_INPUT_FILES, files.size()); //LOG.debug("Total # of splits: " + splits.size()); return splits; }
From source file:Vectors.java
License:Apache License
public static Vector readSequenceFile(Path path, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); for (FileStatus fileStatus : fs.listStatus(path)) { if (fileStatus.getPath().getName().contains("part-")) { SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, fileStatus.getPath(), conf); Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf); VectorWritable value = (VectorWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);//www.j ava2s . com reader.next(key, value); return value.get(); } finally { IOUtils.closeStream(reader); } } } return null; }
From source file:UtilIndexMR.java
License:Open Source License
public static void indexmrtest(String datadir, String indexdir, int filenum, int recnum, boolean var, boolean compress, boolean seq, boolean overwrite, boolean column, String idx, boolean removefile) throws Exception { if (column) { UtilIndexStorage.writeColumnFDF(datadir, filenum, recnum, (short) -1, var, compress, seq, overwrite); } else {/*from w w w. j a va2 s . c o m*/ UtilIndexStorage.writeFDF(datadir, filenum, recnum, (short) -1, var, compress, seq, overwrite); } FileStatus[] ss = fs.listStatus(new Path(datadir)); StringBuffer sb = new StringBuffer(); for (FileStatus fileStatus : ss) { sb.append(fileStatus.getPath().toString()).append(","); } System.out.println(sb.toString()); IndexMR.running(conf, sb.substring(0, sb.length() - 1), column, idx, indexdir); IFormatDataFile ifdf = new IFormatDataFile(conf); ifdf.open(indexdir + "/part-00000"); ifdf.seek(filenum * recnum / 2); for (int i = 0; i < 10; i++) { ifdf.next().show(); } ifdf.close(); fs.delete(new Path(indexdir + "/_logs"), true); if (removefile) { fs.delete(new Path(datadir), true); fs.delete(new Path(indexdir), true); } }
From source file:AggregatedLogsPurger.java
License:Apache License
public boolean purge() throws IOException { LocalDateTime now = LocalDateTime.now(); LocalDateTime deleteLogsOlderThanTime = now.minusDays(deleteOlderThanDays); //Identify which log dirs should be deleted FileSystem fs = rootLogDir.getFileSystem(conf); try {//from w w w . j av a 2 s .c om long totalBytes = 0; for (FileStatus userDir : fs.listStatus(rootLogDir)) { if (userDir.isDirectory()) { Path userDirPath = new Path(userDir.getPath(), suffix); System.out.println("Checking for userDir : " + userDirPath); for (FileStatus appDir : fs.listStatus(userDirPath)) { LocalDateTime appDirDate = getAppDirDateTime(appDir.getModificationTime()); if (appDirDate.isBefore(deleteLogsOlderThanTime)) { long size = getLengthRecursively(fs, appDir.getPath()); System.out.println(appDir.getPath() + ", " + appDir.getOwner() + ", " + appDirDate.toString() + ", size=" + size); totalBytes += size; if (shouldDelete) { System.out.println("Deleting " + appDir.getPath()); fs.delete(appDir.getPath(), true); } } } } } System.out.println("Savings : " + totalBytes); } catch (IOException e) { e.printStackTrace(); return false; } finally { fs.close(); } return true; }
From source file:AggregatedLogsPurger.java
License:Apache License
private long getLengthRecursively(FileSystem fs, Path path) throws IOException { long size = 0; for (FileStatus status : fs.listStatus(path)) { if (status.isDirectory()) { getLengthRecursively(fs, status.getPath()); } else {// w w w. j a v a 2s . c om size += status.getLen(); } } return size; }
From source file:DisplayClustering.java
License:Apache License
protected static void loadClustersWritable(Path output) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(output.toUri(), conf); for (FileStatus s : fs.listStatus(output, new ClustersFilter())) { List<Cluster> clusters = readClustersWritable(s.getPath()); CLUSTERS.add(clusters);/*from www . j a va 2 s. c o m*/ } }
From source file:RawParascaleFileSystem.java
License:Apache License
/** * {@inheritDoc}/*w w w . j a va 2 s. c om*/ */ @Override public BlockLocation[] getFileBlockLocations(final FileStatus file, final long start, final long len) throws IOException { ChunkLocator newChunkLocator = null; if (file.getLen() < start + len) { throw new IOException("start+len must be less or equal than file length"); } final ArrayList<BlockLocation> locations = new ArrayList<BlockLocation>(); try { newChunkLocator = newChunkLocator(); final Path makeQualified = file.getPath().makeQualified(this.getUri(), this.getWorkingDirectory()); // sorted by offset final ChunkLocation[] chunkLocations = newChunkLocator.getChunkLocations(pathToFile(makeQualified), getVirtualFSFromPath(makeQualified, true)); long begin = start; long length = len; for (final ChunkLocation chunkLocation : chunkLocations) { final ChunkInfo chunkInfo = chunkLocation.getChunkInfo(); final StorageNodeInfo[] storageNodeInfo = chunkLocation.getStorageNodeInfo(); if (length <= 0) { // stop when length exceeded break; } if (begin < chunkInfo.getChunkOffset()) { // skip if location not reached yet continue; } final List<String> hosts = new ArrayList<String>(0); for (int j = 0; j < storageNodeInfo.length; j++) { // select all enabled and running nodes if (storageNodeInfo[j].isUp() && storageNodeInfo[j].isEnabled()) { hosts.add(storageNodeInfo[j].getNodeName()); } } final long lengthInChunk = chunkInfo.getChunkLength() - (begin - chunkInfo.getChunkOffset()); final BlockLocation blockLocation = new BlockLocation(null, hosts.toArray(new String[0]), begin, lengthInChunk < length ? lengthInChunk : length); begin += blockLocation.getLength(); length -= blockLocation.getLength(); locations.add(blockLocation); } if (pLog.isDebugEnabled()) { pLog.debug("Fetched " + locations.size() + " chunk locations for " + makeQualified); } return locations.toArray(new BlockLocation[0]); } catch (final ChunkStorageException e) { throw new IOException( "can not fetch chunk locations " + newChunkLocator == null ? "" : newChunkLocator.toString(), e); } finally { if (newChunkLocator != null) { newChunkLocator.close(); } } }
From source file:a.TestConcatExample.java
License:Apache License
@Test public void concatIsPermissive() throws IOException, URISyntaxException { MiniDFSCluster cluster = null;/* ww w .j a va 2 s . c o m*/ final Configuration conf = WebHdfsTestUtil.createConf(); conf.set("dfs.namenode.fs-limits.min-block-size", "1000"); // Allow tiny blocks for the test try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); cluster.waitActive(); final FileSystem webHdfs = WebHdfsTestUtil.getWebHdfsFileSystem(conf, WebHdfsFileSystem.SCHEME); final FileSystem dfs = cluster.getFileSystem(); final FileSystem fs = dfs; // WebHDFS has a bug in getLocatedBlocks Path root = new Path("/dir"); fs.mkdirs(root); short origRep = 3; short secondRep = (short) (origRep - 1); Path f1 = new Path("/dir/f1"); long size1 = writeFile(fs, f1, /* blocksize */ 4096, origRep, 5); long f1NumBlocks = fs.getFileBlockLocations(f1, 0, size1).length; assertEquals(5, f1NumBlocks); Path f2 = new Path("/dir/f2"); long size2 = writeFile(fs, f2, /* blocksize (must divide 512 for checksum) */ 4096 - 512, secondRep, 4); long f2NumBlocks = fs.getFileBlockLocations(f2, 0, size2).length; assertEquals(5, f2NumBlocks); fs.concat(f1, new Path[] { f2 }); FileStatus[] fileStatuses = fs.listStatus(root); // Only one file should remain assertEquals(1, fileStatuses.length); FileStatus fileStatus = fileStatuses[0]; // And it should be named after the first file assertEquals("f1", fileStatus.getPath().getName()); // The entire file takes the replication of the first argument assertEquals(origRep, fileStatus.getReplication()); // As expected, the new concated file is the length of both the previous files assertEquals(size1 + size2, fileStatus.getLen()); // And we should have the same number of blocks assertEquals(f1NumBlocks + f2NumBlocks, fs.getFileBlockLocations(fileStatus.getPath(), 0, size1 + size2).length); } finally { if (cluster != null) { cluster.shutdown(); } } }
From source file:acromusashi.stream.bolt.hdfs.HdfsPreProcessor.java
License:Open Source License
/** * HDFSSink????<br>//from w w w .jav a 2s. c o m * ????????????????? * * @param hdfs * @param baseUrl URL * @param baseName ?? * @param tmpSuffix ?? */ public static void execute(FileSystem hdfs, String baseUrl, String baseName, String tmpSuffix) { String baseRealUrl = baseUrl; if (baseRealUrl.endsWith("/") == false) { baseRealUrl = baseRealUrl + "/"; } String targetPattern = baseRealUrl + baseName + "[0-9]*" + tmpSuffix + "*"; Path targetPathPattern = new Path(targetPattern); FileStatus[] targetTmpFiles = null; try { targetTmpFiles = hdfs.globStatus(targetPathPattern); } catch (IOException ioex) { logger.warn("Failed to search preprocess target files. Skip preprocess.", ioex); return; } if (targetTmpFiles.length == 0) { String logFormat = "Preprocess target files not exist. Path={0}"; String logMessage = MessageFormat.format(logFormat, targetPattern); logger.info(logMessage); return; } if (logger.isInfoEnabled() == true) { printTargetPathList(targetTmpFiles); } for (FileStatus targetTmpFile : targetTmpFiles) { renameTmpFile(hdfs, targetTmpFile.getPath().toString(), tmpSuffix); } }