Example usage for org.apache.hadoop.fs ContentSummary getFileCount

Introduction

In this page you can find the example usage for org.apache.hadoop.fs ContentSummary getFileCount.

Prototype

public long getFileCount()

Source Link

Usage

From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtilTest.java

License:Apache License

@Test
public void stagePluginsForCache() throws Exception {
    DistributedCacheUtil ch = new DistributedCacheUtil();

    Configuration conf = new Configuration();
    org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(conf);

    Path pluginsDir = new Path("bin/test/plugins-installation-dir");

    FileObject pluginDir = createTestFolderWithContent();

    try {/*  w  ww.  j  av  a  2 s.  c  o m*/
        ch.stagePluginsForCache(fs, pluginsDir, true, Arrays.asList(pluginDir));
        Path pluginInstallPath = new Path(pluginsDir, pluginDir.getURL().toURI().getPath());
        assertTrue(fs.exists(pluginInstallPath));
        ContentSummary summary = fs.getContentSummary(pluginInstallPath);
        assertEquals(3, summary.getFileCount());
        assertEquals(2, summary.getDirectoryCount());
    } finally {
        pluginDir.delete(new AllFileSelector());
        fs.delete(pluginsDir, true);
    }
}

From source file:org.pentaho.hadoop.shim.common.DistributedCacheTestUtil.java

License:Apache License

/**
 * Utility to attempt to stage a file to HDFS for use with Distributed Cache.
 *
 * @param ch                Distributed Cache Helper
 * @param source            File or directory to stage
 * @param fs                FileSystem to stage to
 * @param root              Root directory to clean up when this test is complete
 * @param dest              Destination path to stage to
 * @param expectedFileCount Expected number of files to exist in the destination once staged
 * @param expectedDirCount  Expected number of directories to exist in the destiation once staged
 * @throws Exception//from  www .  ja  v  a2 s  .  c om
 */
static void stageForCacheTester(DistributedCacheUtilImpl ch, FileObject source, FileSystem fs, Path root,
        Path dest, int expectedFileCount, int expectedDirCount) throws Exception {
    try {
        ch.stageForCache(source, fs, dest, true);

        assertTrue(fs.exists(dest));
        ContentSummary cs = fs.getContentSummary(dest);
        assertEquals(expectedFileCount, cs.getFileCount());
        assertEquals(expectedDirCount, cs.getDirectoryCount());
        assertEquals(FsPermission.createImmutable((short) 0755), fs.getFileStatus(dest).getPermission());
    } finally {
        // Clean up after ourself
        if (!fs.delete(root, true)) {
            System.err.println("error deleting FileSystem temp dir " + root);
        }
    }
}

From source file:org.pentaho.hadoop.shim.common.DistributedCacheUtilImplOSDependentTest.java

License:Apache License

@Test
public void stagePluginsForCache() throws Exception {
    DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG);

    Configuration conf = new Configuration();
    FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf);

    Path pluginsDir = new Path("bin/test/plugins-installation-dir");

    FileObject pluginDir = DistributedCacheTestUtil.createTestFolderWithContent();

    try {/*from   w w w.j  av a2s .  c om*/
        ch.stagePluginsForCache(fs, pluginsDir, "bin/test/sample-folder");
        Path pluginInstallPath = new Path(pluginsDir, "bin/test/sample-folder");
        assertTrue(fs.exists(pluginInstallPath));
        ContentSummary summary = fs.getContentSummary(pluginInstallPath);
        assertEquals(6, summary.getFileCount());
        assertEquals(6, summary.getDirectoryCount());
    } finally {
        pluginDir.delete(new AllFileSelector());
        fs.delete(pluginsDir, true);
    }
}

From source file:org.slc.sli.aggregation.mapreduce.map.ValueMapperTest.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
@Test//from   w  w  w .j  a v  a2s.  c om
public void testMap() throws Exception {
    TenantAndIdEmittableKey key = new TenantAndIdEmittableKey();
    ValueMapper m = new MockValueMapper();
    BSONObject entry = new BasicBSONObject("found", "data");
    BSONWritable entity = new BSONWritable(entry);

    Context context = Mockito.mock(Context.class);
    PowerMockito.when(context, "write", Matchers.any(EmittableKey.class), Matchers.any(BSONObject.class))
            .thenAnswer(new Answer<BSONObject>() {

                @Override
                public BSONObject answer(InvocationOnMock invocation) throws Throwable {

                    Object[] args = invocation.getArguments();

                    assertNotNull(args);
                    assertEquals(args.length, 2);

                    assertTrue(args[0] instanceof TenantAndIdEmittableKey);
                    assertTrue(args[1] instanceof ContentSummary);

                    TenantAndIdEmittableKey id = (TenantAndIdEmittableKey) args[0];
                    assertNotNull(id);

                    ContentSummary e = (ContentSummary) args[1];
                    assertEquals(e.getLength(), 1);
                    assertEquals(e.getFileCount(), 2);
                    assertEquals(e.getDirectoryCount(), 3);

                    return null;
                }
            });

    m.map(key, entity, context);
}

From source file:test.hiveserver.parse.SemanticAnalyzer.java

License:Apache License

private void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx,
        GlobalLimitCtx globalLimitCtx) throws SemanticException {

    // bypass for explain queries for now
    if (ctx.getExplain()) {
        return;//from   w  ww  . j  a v a 2 s .  c o m
    }

    // user has told us to run in local mode or doesn't want auto-local mode
    if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
        return;
    }

    final Context lCtx = ctx;
    PathFilter p = new PathFilter() {
        public boolean accept(Path file) {
            return !lCtx.isMRTmpFileURI(file.toUri().getPath());
        }
    };
    List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);

    // map-reduce jobs will be run locally based on data size
    // first find out if any of the jobs needs to run non-locally
    boolean hasNonLocalJob = false;
    for (ExecDriver mrtask : mrtasks) {
        try {
            ContentSummary inputSummary = Utilities.getInputSummary(ctx, (MapredWork) mrtask.getWork(), p);
            int numReducers = getNumberOfReducers(mrtask.getWork(), conf);

            long estimatedInput;

            if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
                // If the global limit optimization is triggered, we will
                // estimate input data actually needed based on limit rows.
                // estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
                //
                long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
                estimatedInput = globalLimitCtx.getGlobalLimit() * sizePerRow;
                long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE);
                long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
                estimatedInput = estimatedInput * (estimatedNumMap + 1);
            } else {
                estimatedInput = inputSummary.getLength();
            }

            if (LOG.isDebugEnabled()) {
                LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + ","
                        + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: "
                        + estimatedInput);
            }

            if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput,
                    inputSummary.getFileCount()) != null) {
                hasNonLocalJob = true;
                break;
            } else {
                mrtask.setLocalMode(true);
            }
        } catch (IOException e) {
            throw new SemanticException(e);
        }
    }

    if (!hasNonLocalJob) {
        // none of the mapred tasks needs to be run locally. That means that the
        // query can be executed entirely in local mode. Save the current tracker
        // value and restore it when done
        ctx.setOriginalTracker(conf.getVar(HiveConf.ConfVars.HADOOPJT));
        conf.setVar(HiveConf.ConfVars.HADOOPJT, "local");
        console.printInfo("Automatically selecting local only mode for query");

        // If all the tasks can be run locally, we can use local disk for
        // storing intermediate data.

        /**
         * This code is commented out pending further testing/development
         * for (Task<? extends Serializable> t: rootTasks)
         * t.localizeMRTmpFiles(ctx);
         */
    }
}