Example usage for org.apache.hadoop.fs FileSystem getContentSummary

List of usage examples for org.apache.hadoop.fs FileSystem getContentSummary

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getContentSummary.

Prototype

public ContentSummary getContentSummary(Path f) throws IOException 

Source Link

Document

Return the ContentSummary of a given Path .

Usage

From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtilTest.java

License:Apache License

@Test
public void stagePluginsForCache() throws Exception {
    DistributedCacheUtil ch = new DistributedCacheUtil();

    Configuration conf = new Configuration();
    org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(conf);

    Path pluginsDir = new Path("bin/test/plugins-installation-dir");

    FileObject pluginDir = createTestFolderWithContent();

    try {//from   w ww  .j a  va2s  .  co m
        ch.stagePluginsForCache(fs, pluginsDir, true, Arrays.asList(pluginDir));
        Path pluginInstallPath = new Path(pluginsDir, pluginDir.getURL().toURI().getPath());
        assertTrue(fs.exists(pluginInstallPath));
        ContentSummary summary = fs.getContentSummary(pluginInstallPath);
        assertEquals(3, summary.getFileCount());
        assertEquals(2, summary.getDirectoryCount());
    } finally {
        pluginDir.delete(new AllFileSelector());
        fs.delete(pluginsDir, true);
    }
}

From source file:org.pentaho.hadoop.shim.common.DistributedCacheTestUtil.java

License:Apache License

/**
 * Utility to attempt to stage a file to HDFS for use with Distributed Cache.
 *
 * @param ch                Distributed Cache Helper
 * @param source            File or directory to stage
 * @param fs                FileSystem to stage to
 * @param root              Root directory to clean up when this test is complete
 * @param dest              Destination path to stage to
 * @param expectedFileCount Expected number of files to exist in the destination once staged
 * @param expectedDirCount  Expected number of directories to exist in the destiation once staged
 * @throws Exception//from   w  ww .  ja  v  a2 s . c  o  m
 */
static void stageForCacheTester(DistributedCacheUtilImpl ch, FileObject source, FileSystem fs, Path root,
        Path dest, int expectedFileCount, int expectedDirCount) throws Exception {
    try {
        ch.stageForCache(source, fs, dest, true);

        assertTrue(fs.exists(dest));
        ContentSummary cs = fs.getContentSummary(dest);
        assertEquals(expectedFileCount, cs.getFileCount());
        assertEquals(expectedDirCount, cs.getDirectoryCount());
        assertEquals(FsPermission.createImmutable((short) 0755), fs.getFileStatus(dest).getPermission());
    } finally {
        // Clean up after ourself
        if (!fs.delete(root, true)) {
            System.err.println("error deleting FileSystem temp dir " + root);
        }
    }
}

From source file:org.pentaho.hadoop.shim.common.DistributedCacheUtilImplOSDependentTest.java

License:Apache License

@Test
public void stagePluginsForCache() throws Exception {
    DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG);

    Configuration conf = new Configuration();
    FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf);

    Path pluginsDir = new Path("bin/test/plugins-installation-dir");

    FileObject pluginDir = DistributedCacheTestUtil.createTestFolderWithContent();

    try {//w ww.ja  v a2s .  co  m
        ch.stagePluginsForCache(fs, pluginsDir, "bin/test/sample-folder");
        Path pluginInstallPath = new Path(pluginsDir, "bin/test/sample-folder");
        assertTrue(fs.exists(pluginInstallPath));
        ContentSummary summary = fs.getContentSummary(pluginInstallPath);
        assertEquals(6, summary.getFileCount());
        assertEquals(6, summary.getDirectoryCount());
    } finally {
        pluginDir.delete(new AllFileSelector());
        fs.delete(pluginsDir, true);
    }
}

From source file:org.roc.hdfs.bolt.rules.FileSizeRotationPolicy.java

License:Apache License

@Override
public boolean mark(FileSystem fs, Path path, FileNameFormat format) {
    long size = 0L;
    try {//ww w  . j a  va 2s.c  o  m
        size = fs.getContentSummary(path).getLength();
    } catch (IOException e) {
        LOG.warn("Error when get file's length  ", e);
        return false;
    }
    if (size >= this.maxBytes) {
        return true;
    } else {
        return false;
    }
}

From source file:org.schedoscope.metascope.task.metastore.MetastoreTask.java

License:Apache License

private Long getDirectorySize(FileSystem fs, String path) {
    try {//  w  ww  .j a  v a2s.  c om
        return fs.getContentSummary(new Path(path)).getSpaceConsumed();
    } catch (FileNotFoundException e) {
        LOG.warn("Directory '{}' does not exists", path);
        return 0L;
    } catch (IOException e) {
        LOG.error("Error retrieving size for directory '{}'", path, e);
        return 0L;
    }
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public Map<Path, ContentSummary> count(final boolean quota, String... uris) {

    final Map<Path, ContentSummary> results = new PrettyPrintMap<Path, ContentSummary>(uris.length,
            new MapPrinter<Path, ContentSummary>() {
                @Override/*  ww w  . j  av  a  2s.com*/
                public String toString(Path p, ContentSummary c) throws IOException {
                    return c.toString(quota) + p;
                }
            });

    for (String src : uris) {
        try {
            Path srcPath = new Path(src);
            final FileSystem fs = getFS(srcPath);
            FileStatus[] statuses = fs.globStatus(srcPath);
            Assert.notEmpty(statuses, "Can not find listing for " + src);
            for (FileStatus s : statuses) {
                Path p = s.getPath();
                results.put(p, fs.getContentSummary(p));
            }
        } catch (IOException ex) {
            throw new HadoopException("Cannot find listing " + ex.getMessage(), ex);
        }
    }

    return Collections.unmodifiableMap(results);
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

public Map<Path, Long> du(final boolean summary, String... strings) {
    if (ObjectUtils.isEmpty(strings)) {
        strings = new String[] { "." };
    }//w  w w  .  j  a  va 2  s.com

    final int BORDER = 2;

    Map<Path, Long> results = new PrettyPrintMap<Path, Long>(strings.length, new MapPrinter<Path, Long>() {

        @Override
        public String toString(Path path, Long size) throws Exception {
            if (summary) {
                return ("".equals(path) ? "." : path) + "\t" + size;
            }
            return String.format("%-" + (10 + BORDER) + "d", size) + path;
        }
    });

    try {
        for (String src : strings) {
            Path srcPath = new Path(src);
            FileSystem srcFs = getFS(srcPath);
            FileStatus[] fileStatus = srcFs.globStatus(srcPath);
            if (summary) {
                for (FileStatus status : fileStatus) {
                    results.put(status.getPath(), srcFs.getContentSummary(status.getPath()).getLength());
                }
            } else {
                FileStatus items[] = srcFs.listStatus(FileUtil.stat2Paths(fileStatus, srcPath));
                if (ObjectUtils.isEmpty(items) && (!srcFs.exists(srcPath))) {
                    throw new HadoopException("Cannot access " + src + ": No such file or directory.");
                }
                for (FileStatus status : items) {
                    Long size = (status.isDir() ? srcFs.getContentSummary(status.getPath()).getLength()
                            : status.getLen());
                    results.put(status.getPath(), size);
                }
            }
        }
    } catch (IOException ex) {
        throw new HadoopException("Cannot inspect resources " + ex.getMessage(), ex);
    }

    return Collections.unmodifiableMap(results);
}

From source file:simsql.runtime.MRLoader.java

License:Apache License

public long run(String inputPath, String outputPath, short typeCode, Relation r, int sortAtt) {

    // make a directory for the relation
    Configuration conf = new Configuration();
    FileSystem dfs = null;//from   w ww  .  j  a  v  a  2  s .co m

    try {
        dfs = FileSystem.get(conf);
    } catch (Exception e) {
        throw new RuntimeException("Cannot access HDFS!", e);
    }

    try {
        // if it exists, destroy it.
        Path path = new Path(outputPath);
        if (dfs.exists(path)) {
            dfs.delete(path, true);
        }
    } catch (Exception e) {
        throw new RuntimeException("Could not create the file to bulk load to!", e);
    }

    // find a file name 
    String tempPath = null;
    if (inputPath.startsWith("hdfs:")) {
        tempPath = inputPath.replace("hdfs:", "");
    } else {
        tempPath = "/tempDataFile_" + r.getName();
        try {
            dfs.delete(new Path(tempPath), true);
        } catch (Exception e) {
            // ignore this.
        }

        // upload the text file
        try {
            dfs.copyFromLocalFile(false, true, new Path(inputPath), new Path(tempPath));
            dfs.deleteOnExit(new Path(tempPath));
        } catch (Exception e) {
            throw new RuntimeException("Failed to upload text file " + inputPath + " to HDFS!", e);
        }
    }

    // set up the new job's parameters.
    conf.setBoolean("mapred.compress.map.output", true);
    conf.set("mapred.map.output.compression.codec", RecordCompression.getCodecClass());

    conf.set("io.serializations",
            "simsql.runtime.RecordSerialization,simsql.runtime.RecordKeySerialization,org.apache.hadoop.io.serializer.WritableSerialization");
    conf.setInt("simsql.loader.numAtts", r.getAttributes().size());
    conf.setInt("simsql.loader.typeCode", (int) typeCode);
    conf.setInt("simsql.loader.sortAtt", sortAtt);

    String[] myStrings = new String[r.getAttributes().size()];
    int j = 0;
    for (simsql.compiler.Attribute a : r.getAttributes()) {
        myStrings[j++] = a.getPhysicalRealization().getClass().getName();
    }

    conf.setStrings("simsql.loader.types", myStrings);

    // create a job
    Job job;
    try {
        job = new Job(conf);
    } catch (Exception e) {
        throw new RuntimeException("Unable to create bulk loading job!", e);
    }

    // set the split size (number of mappers)
    long fSize = 0;
    if (inputPath.startsWith("hdfs")) {
        fSize = RelOp.getPathsTotalSize(new String[] { tempPath });
    } else {
        fSize = new File(inputPath).length();
    }

    FileInputFormat.setMinInputSplitSize(job, fSize / (long) numTasks);
    FileInputFormat.setMaxInputSplitSize(job, fSize / (long) numTasks);

    // and the number of reducers
    job.setNumReduceTasks(numTasks);

    // the mapper/reducer/jar
    job.setMapperClass(MRLoaderMapper.class);
    job.setReducerClass(MRLoaderReducer.class);
    job.setJarByClass(MRLoader.class);

    // I/O settings.
    job.setOutputFormatClass(RecordOutputFormat.class);

    job.setMapOutputKeyClass(RecordKey.class);
    job.setMapOutputValueClass(RecordWrapper.class);
    job.setOutputKeyClass(Nothing.class);
    job.setOutputValueClass(Record.class);
    try {
        FileInputFormat.setInputPaths(job, new Path(tempPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));
    } catch (Exception e) {
        throw new RuntimeException("Could not set job inputs/outputs", e);
    }
    job.setGroupingComparatorClass(RecordKeyGroupingComparator.class);
    job.setPartitionerClass(RecordPartitioner.class);
    job.setSortComparatorClass(RecordKeySortComparator.class);

    job.setJobName("MRLoader: " + inputPath + " ==> " + outputPath);

    // run it
    Counters counters;
    try {
        job.waitForCompletion(true);
        counters = job.getCounters();
    } catch (Exception e) {
        throw new RuntimeException("Could not set up bulk loader job!", e);
    }

    // now, delete all the empty part files
    try {

        // get a filesystem
        FileSystem ddfs = FileSystem.get(conf);
        Path outPath = new Path(outputPath);
        if (ddfs.exists(outPath) && ddfs.isDirectory(outPath)) {
            FileStatus fstatus[] = ddfs.listStatus(outPath, new TableFileFilter());
            for (FileStatus ff : fstatus) {
                if (ddfs.getContentSummary(ff.getPath()).getLength() <= 4) { // snappy leaves 4-byte long files around...
                    ddfs.delete(ff.getPath(), true);
                }
            }
        }
    } catch (Exception e) { // this isn't disastrous 
    }

    // get the counter for the output of the mapper.
    Counter bytesCounter = counters.findCounter(OutputFileSerializer.Counters.BYTES_WRITTEN);
    return bytesCounter.getValue();
}

From source file:simsql.runtime.RelOp.java

License:Apache License

public static long getPathsTotalSize(String[] paths) {
    try {//from  w ww  .  j  av a 2  s.  c  om

        // get a configuration and a fileSystem
        Configuration conf = new Configuration();
        FileSystem dfs = FileSystem.get(conf);

        long totalSize = 0;
        for (String s : paths) {
            Path path = new Path(s);
            if (dfs.exists(path)) {
                totalSize += dfs.getContentSummary(path).getLength();
            }
        }

        // return
        return totalSize;
    } catch (Exception e) {
        return 0;
    }
}

From source file:simsql.runtime.RelOp.java

License:Apache License

public long getSplitSize(RuntimeParameter params) {

    // default value = fileSize / numProcessors
    ExampleRuntimeParameter p = (ExampleRuntimeParameter) params;
    Configuration conf = new Configuration();
    long dfsBlockSize = (long) conf.getInt("dfs.blocksize", 128 * 1024 * 1024);

    try {/*w  w w .  j a  v  a2 s.co m*/

        // get a configuration and a fileSystem
        FileSystem dfs = FileSystem.get(conf);

        long totalSize = 0;
        for (String s : myInputNetwork.getPipelinedInputFiles()) {
            Path path = new Path(s);
            if (dfs.exists(path)) {
                totalSize += dfs.getContentSummary(path).getLength();
            }
        }

        // if it's too small, just use a block.
        if (totalSize < dfsBlockSize)
            return dfsBlockSize;

        // otherwise, divide
        return totalSize / p.getNumCPUs();

    } catch (Exception e) {

        // if we fail, just return the DFS block size!!!
        return (long) dfsBlockSize;
    }
}