List of usage examples for org.apache.hadoop.fs FileSystem getContentSummary
public ContentSummary getContentSummary(Path f) throws IOException
From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtilTest.java
License:Apache License
@Test public void stagePluginsForCache() throws Exception { DistributedCacheUtil ch = new DistributedCacheUtil(); Configuration conf = new Configuration(); org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(conf); Path pluginsDir = new Path("bin/test/plugins-installation-dir"); FileObject pluginDir = createTestFolderWithContent(); try {//from w ww .j a va2s . co m ch.stagePluginsForCache(fs, pluginsDir, true, Arrays.asList(pluginDir)); Path pluginInstallPath = new Path(pluginsDir, pluginDir.getURL().toURI().getPath()); assertTrue(fs.exists(pluginInstallPath)); ContentSummary summary = fs.getContentSummary(pluginInstallPath); assertEquals(3, summary.getFileCount()); assertEquals(2, summary.getDirectoryCount()); } finally { pluginDir.delete(new AllFileSelector()); fs.delete(pluginsDir, true); } }
From source file:org.pentaho.hadoop.shim.common.DistributedCacheTestUtil.java
License:Apache License
/** * Utility to attempt to stage a file to HDFS for use with Distributed Cache. * * @param ch Distributed Cache Helper * @param source File or directory to stage * @param fs FileSystem to stage to * @param root Root directory to clean up when this test is complete * @param dest Destination path to stage to * @param expectedFileCount Expected number of files to exist in the destination once staged * @param expectedDirCount Expected number of directories to exist in the destiation once staged * @throws Exception//from w ww . ja v a2 s . c o m */ static void stageForCacheTester(DistributedCacheUtilImpl ch, FileObject source, FileSystem fs, Path root, Path dest, int expectedFileCount, int expectedDirCount) throws Exception { try { ch.stageForCache(source, fs, dest, true); assertTrue(fs.exists(dest)); ContentSummary cs = fs.getContentSummary(dest); assertEquals(expectedFileCount, cs.getFileCount()); assertEquals(expectedDirCount, cs.getDirectoryCount()); assertEquals(FsPermission.createImmutable((short) 0755), fs.getFileStatus(dest).getPermission()); } finally { // Clean up after ourself if (!fs.delete(root, true)) { System.err.println("error deleting FileSystem temp dir " + root); } } }
From source file:org.pentaho.hadoop.shim.common.DistributedCacheUtilImplOSDependentTest.java
License:Apache License
@Test public void stagePluginsForCache() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf); Path pluginsDir = new Path("bin/test/plugins-installation-dir"); FileObject pluginDir = DistributedCacheTestUtil.createTestFolderWithContent(); try {//w ww.ja v a2s . co m ch.stagePluginsForCache(fs, pluginsDir, "bin/test/sample-folder"); Path pluginInstallPath = new Path(pluginsDir, "bin/test/sample-folder"); assertTrue(fs.exists(pluginInstallPath)); ContentSummary summary = fs.getContentSummary(pluginInstallPath); assertEquals(6, summary.getFileCount()); assertEquals(6, summary.getDirectoryCount()); } finally { pluginDir.delete(new AllFileSelector()); fs.delete(pluginsDir, true); } }
From source file:org.roc.hdfs.bolt.rules.FileSizeRotationPolicy.java
License:Apache License
@Override public boolean mark(FileSystem fs, Path path, FileNameFormat format) { long size = 0L; try {//ww w . j a va 2s.c o m size = fs.getContentSummary(path).getLength(); } catch (IOException e) { LOG.warn("Error when get file's length ", e); return false; } if (size >= this.maxBytes) { return true; } else { return false; } }
From source file:org.schedoscope.metascope.task.metastore.MetastoreTask.java
License:Apache License
private Long getDirectorySize(FileSystem fs, String path) { try {// w ww .j a v a2s. c om return fs.getContentSummary(new Path(path)).getSpaceConsumed(); } catch (FileNotFoundException e) { LOG.warn("Directory '{}' does not exists", path); return 0L; } catch (IOException e) { LOG.error("Error retrieving size for directory '{}'", path, e); return 0L; } }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public Map<Path, ContentSummary> count(final boolean quota, String... uris) { final Map<Path, ContentSummary> results = new PrettyPrintMap<Path, ContentSummary>(uris.length, new MapPrinter<Path, ContentSummary>() { @Override/* ww w . j av a 2s.com*/ public String toString(Path p, ContentSummary c) throws IOException { return c.toString(quota) + p; } }); for (String src : uris) { try { Path srcPath = new Path(src); final FileSystem fs = getFS(srcPath); FileStatus[] statuses = fs.globStatus(srcPath); Assert.notEmpty(statuses, "Can not find listing for " + src); for (FileStatus s : statuses) { Path p = s.getPath(); results.put(p, fs.getContentSummary(p)); } } catch (IOException ex) { throw new HadoopException("Cannot find listing " + ex.getMessage(), ex); } } return Collections.unmodifiableMap(results); }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
public Map<Path, Long> du(final boolean summary, String... strings) { if (ObjectUtils.isEmpty(strings)) { strings = new String[] { "." }; }//w w w . j a va 2 s.com final int BORDER = 2; Map<Path, Long> results = new PrettyPrintMap<Path, Long>(strings.length, new MapPrinter<Path, Long>() { @Override public String toString(Path path, Long size) throws Exception { if (summary) { return ("".equals(path) ? "." : path) + "\t" + size; } return String.format("%-" + (10 + BORDER) + "d", size) + path; } }); try { for (String src : strings) { Path srcPath = new Path(src); FileSystem srcFs = getFS(srcPath); FileStatus[] fileStatus = srcFs.globStatus(srcPath); if (summary) { for (FileStatus status : fileStatus) { results.put(status.getPath(), srcFs.getContentSummary(status.getPath()).getLength()); } } else { FileStatus items[] = srcFs.listStatus(FileUtil.stat2Paths(fileStatus, srcPath)); if (ObjectUtils.isEmpty(items) && (!srcFs.exists(srcPath))) { throw new HadoopException("Cannot access " + src + ": No such file or directory."); } for (FileStatus status : items) { Long size = (status.isDir() ? srcFs.getContentSummary(status.getPath()).getLength() : status.getLen()); results.put(status.getPath(), size); } } } } catch (IOException ex) { throw new HadoopException("Cannot inspect resources " + ex.getMessage(), ex); } return Collections.unmodifiableMap(results); }
From source file:simsql.runtime.MRLoader.java
License:Apache License
public long run(String inputPath, String outputPath, short typeCode, Relation r, int sortAtt) { // make a directory for the relation Configuration conf = new Configuration(); FileSystem dfs = null;//from w ww . j a v a 2 s .co m try { dfs = FileSystem.get(conf); } catch (Exception e) { throw new RuntimeException("Cannot access HDFS!", e); } try { // if it exists, destroy it. Path path = new Path(outputPath); if (dfs.exists(path)) { dfs.delete(path, true); } } catch (Exception e) { throw new RuntimeException("Could not create the file to bulk load to!", e); } // find a file name String tempPath = null; if (inputPath.startsWith("hdfs:")) { tempPath = inputPath.replace("hdfs:", ""); } else { tempPath = "/tempDataFile_" + r.getName(); try { dfs.delete(new Path(tempPath), true); } catch (Exception e) { // ignore this. } // upload the text file try { dfs.copyFromLocalFile(false, true, new Path(inputPath), new Path(tempPath)); dfs.deleteOnExit(new Path(tempPath)); } catch (Exception e) { throw new RuntimeException("Failed to upload text file " + inputPath + " to HDFS!", e); } } // set up the new job's parameters. conf.setBoolean("mapred.compress.map.output", true); conf.set("mapred.map.output.compression.codec", RecordCompression.getCodecClass()); conf.set("io.serializations", "simsql.runtime.RecordSerialization,simsql.runtime.RecordKeySerialization,org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInt("simsql.loader.numAtts", r.getAttributes().size()); conf.setInt("simsql.loader.typeCode", (int) typeCode); conf.setInt("simsql.loader.sortAtt", sortAtt); String[] myStrings = new String[r.getAttributes().size()]; int j = 0; for (simsql.compiler.Attribute a : r.getAttributes()) { myStrings[j++] = a.getPhysicalRealization().getClass().getName(); } conf.setStrings("simsql.loader.types", myStrings); // create a job Job job; try { job = new Job(conf); } catch (Exception e) { throw new RuntimeException("Unable to create bulk loading job!", e); } // set the split size (number of mappers) long fSize = 0; if (inputPath.startsWith("hdfs")) { fSize = RelOp.getPathsTotalSize(new String[] { tempPath }); } else { fSize = new File(inputPath).length(); } FileInputFormat.setMinInputSplitSize(job, fSize / (long) numTasks); FileInputFormat.setMaxInputSplitSize(job, fSize / (long) numTasks); // and the number of reducers job.setNumReduceTasks(numTasks); // the mapper/reducer/jar job.setMapperClass(MRLoaderMapper.class); job.setReducerClass(MRLoaderReducer.class); job.setJarByClass(MRLoader.class); // I/O settings. job.setOutputFormatClass(RecordOutputFormat.class); job.setMapOutputKeyClass(RecordKey.class); job.setMapOutputValueClass(RecordWrapper.class); job.setOutputKeyClass(Nothing.class); job.setOutputValueClass(Record.class); try { FileInputFormat.setInputPaths(job, new Path(tempPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); } catch (Exception e) { throw new RuntimeException("Could not set job inputs/outputs", e); } job.setGroupingComparatorClass(RecordKeyGroupingComparator.class); job.setPartitionerClass(RecordPartitioner.class); job.setSortComparatorClass(RecordKeySortComparator.class); job.setJobName("MRLoader: " + inputPath + " ==> " + outputPath); // run it Counters counters; try { job.waitForCompletion(true); counters = job.getCounters(); } catch (Exception e) { throw new RuntimeException("Could not set up bulk loader job!", e); } // now, delete all the empty part files try { // get a filesystem FileSystem ddfs = FileSystem.get(conf); Path outPath = new Path(outputPath); if (ddfs.exists(outPath) && ddfs.isDirectory(outPath)) { FileStatus fstatus[] = ddfs.listStatus(outPath, new TableFileFilter()); for (FileStatus ff : fstatus) { if (ddfs.getContentSummary(ff.getPath()).getLength() <= 4) { // snappy leaves 4-byte long files around... ddfs.delete(ff.getPath(), true); } } } } catch (Exception e) { // this isn't disastrous } // get the counter for the output of the mapper. Counter bytesCounter = counters.findCounter(OutputFileSerializer.Counters.BYTES_WRITTEN); return bytesCounter.getValue(); }
From source file:simsql.runtime.RelOp.java
License:Apache License
public static long getPathsTotalSize(String[] paths) { try {//from w ww . j av a 2 s. c om // get a configuration and a fileSystem Configuration conf = new Configuration(); FileSystem dfs = FileSystem.get(conf); long totalSize = 0; for (String s : paths) { Path path = new Path(s); if (dfs.exists(path)) { totalSize += dfs.getContentSummary(path).getLength(); } } // return return totalSize; } catch (Exception e) { return 0; } }
From source file:simsql.runtime.RelOp.java
License:Apache License
public long getSplitSize(RuntimeParameter params) { // default value = fileSize / numProcessors ExampleRuntimeParameter p = (ExampleRuntimeParameter) params; Configuration conf = new Configuration(); long dfsBlockSize = (long) conf.getInt("dfs.blocksize", 128 * 1024 * 1024); try {/*w w w . j a v a2 s.co m*/ // get a configuration and a fileSystem FileSystem dfs = FileSystem.get(conf); long totalSize = 0; for (String s : myInputNetwork.getPipelinedInputFiles()) { Path path = new Path(s); if (dfs.exists(path)) { totalSize += dfs.getContentSummary(path).getLength(); } } // if it's too small, just use a block. if (totalSize < dfsBlockSize) return dfsBlockSize; // otherwise, divide return totalSize / p.getNumCPUs(); } catch (Exception e) { // if we fail, just return the DFS block size!!! return (long) dfsBlockSize; } }